/*******************************************************************************

	"Road Endpoints and City Sizes"
	
	Replication Package
	
	Data Preparation Do-File
	
OBS: this do-file may take a couple hours to run

*******************************************************************************/

clear

*set the folder of this do-file as the working directory:
cd "WRITE HERE"

*set the folders of input and output files:
global data "Data"
global workingdata "Working Data"	

/*install packages used in this project:
ssc install shapley2
ssc install acreg
ssc install psacalc
ssc install geonear
ssc install geodist
ssc install boottest
ssc install ranktest
ssc install ivreg2
*/

********************************************************************************
*1) PREPARE THE RAILROAD STATION DATA
********************************************************************************

*************
*IMPORT DATA
*************

import delim "$data/rail_stations.csv", delim(";") stringcols(_all) clear

	*destring:
	destring yearf, replace
	destring xcoord, replace
	destring ycoord, replace
	destring endpoint, replace
	destring cia, replace
	
*saving:
save "$workingdata/rail_stations.dta", replace	

**********************
*ADD THE COMPANY NAME
**********************

rename cia company_nr
gen company_name = ""
replace company_name="São Paulo Railway (SPR)" if company_nr==1
replace company_name="Paulista (CPEF)" if company_nr==2
replace company_name="Sorocabana (EFS)" if company_nr==3
replace company_name="Mogiana (CMEF)" if company_nr==4
replace company_name="Araraquarense (EFA)" if company_nr==5
replace company_name="Nororeste do Brasil (NOB)" if company_nr==6
replace company_name="São Paulo-Minas (SPM)" if company_nr==7
replace company_name="São Paulo-Paraná (SPP)" if company_nr==8
	
***********************************************
*GENERATE THE YEAR THE RAILROAD PASSED THROUGH 
***********************************************

*generate the variables that do not change over the loop:
gen year=.
replace year = yearf

*now generate a numerical id: there are 340 stations
egen id_nr = group(id)
sort id_nr

*generate counter:
local counter = 1

*loop many times:
gen looped = endpoint
egen sum_looped = sum(looped)
quietly sum sum_looped
local sumch = r(mean)
local j=0
di "	***		Sum looped = `sumch'		, it.=`j'	***"
while `j'<150 & `sumch'<340 {

*start the loop over stations:
forvalues i=1/340 {

	*update year if next station was founded before:
	sort id_nr
	quietly gen year_aux1 = year if from==id[`i']
	quietly egen year_aux2 = min(year_aux1)
	quietly replace year_aux2 = 9999 if id_nr!=`i'
	quietly replace year = year_aux2 if year_aux2<year & year_aux2!=.
	drop year_aux*
	
	*we also need to update the looped variable:
	sort id_nr
	quietly gen looped_aux1 = looped if from==id[`i']
	quietly egen looped_aux2 = mean(looped_aux1)
	quietly replace looped_aux2 = 2 if id_nr!=`i'	
	quietly replace looped = 1 if looped_aux2==1
	drop looped_aux*

}

*update the sum of looped stations:
drop sum_looped
quietly egen sum_looped = sum(looped)
quietly sum sum_looped
local sumch = r(mean)
local j=`j'+1
di "	***		Sum looped = `sumch'		, it.=`j'	***"

}

drop looped sum_looped

*saving:
save "$workingdata/rail_stations.dta", replace	

*************************************
*GENERATE DISTANCE TO ESTACAO DA LUZ
*************************************

*first, we generate a set of previous stations, to merge:
preserve
keep id id_nr *coord
rename id from
rename id_nr from_nr
rename xcoord xcoord_from
rename ycoord ycoord_from
save "$workingdata/rail_stations_from.dta", replace
restore

*now we merge the main data with it:
merge m:1 from using "$workingdata/rail_stations_from.dta"
drop if _m==2
drop _m

*generate the coordinates for the Estacao da Luz:
replace xcoord_from = xcoord if id=="1"
replace ycoord_from = ycoord if id=="1"

*and generate the distance to the station before:
geodist ycoord xcoord ycoord_from xcoord_from, generate(dist_from) 
	drop ycoord_* xcoord_*
	
*and the linear distance to the Estacao da Luz:
foreach v in xcoord ycoord {
gen aux = `v' if id=="1"
egen `v'_luz	= mean(aux)
drop aux
}
geodist ycoord xcoord ycoord_luz xcoord_luz, generate(ldist_to_luz)
drop ycoord_* xcoord_* 
	
*generate the distance to Estacao da Luz, through rail:
gen stations_to_luz=.
	replace stations_to_luz = 0 if id=="1"
gen dist_to_luz=.
	replace dist_to_luz = 0 if id=="1"
	
*generate the "looped" variable:
gen looped = 0
replace looped = 1 if id=="1"
egen sum_looped = sum(looped)
quietly sum sum_looped
local sumch = r(mean)
local j=0
di "	***		Sum looped = `sumch'		, it.=`j'	***"

*now we start the loop:
while `j'<200 & `sumch'<340 {

	
*start the loop over stations:
forvalues i=1/340 {

*sorting:
sort id_nr

	*we update the values:
		*distance:
		quietly gen dist_aux1 = dist_to_luz if id==from[`i']
		quietly egen dist_aux2 = mean(dist_aux1)
		quietly replace dist_aux2 = . if id_nr!=`i'
		quietly replace dist_to_luz = dist_from+dist_aux2 if dist_aux2!=. & looped==0
		*stations:
		quietly gen stations_aux1 = stations_to_luz if id==from[`i']
		quietly egen stations_aux2 = mean(stations_aux1)
		quietly replace stations_aux2 = . if id_nr!=`i'		
		quietly replace stations_to_luz = 1+stations_aux2 if stations_aux2!=. & looped==0
		*and looped:
		quietly replace looped=1 if dist_aux2!=.
	drop dist_aux* stations_aux*	
		
	
}

*update the sum of looped stations:
drop sum_looped
quietly egen sum_looped = sum(looped)
quietly sum sum_looped
local sumch = r(mean)
local j=`j'+1
di "	***		Sum looped = `sumch'		, it.=`j'	***"


}

drop looped sum_looped

***************************
*PREPARE YEARS AS ENDPOINT
***************************

gen year_next = .
gen dist_next = .

sort id_nr
forvalues i=1/340 {

*years for those after:
quietly gen year_aux1 = year if from==id[`i']
quietly egen year_aux2 = min(year_aux1)
quietly gen dist_next_aux1 = dist_from if from==id[`i'] & year_aux1==year_aux2
quietly egen dist_next_aux2 = mean(dist_next_aux1)


*replace the variable of interest:
quietly replace year_next = year_aux2 if id_nr==`i'
quietly replace dist_next = dist_next_aux2 if id_nr==`i'

*drop:
drop year_aux* dist_next_aux*

}

*generate time as endline:
gen time_as_endpoint = year_next - year


*********************
*LABELING AND SAVING
*********************

*change the name of the municipality code:
rename cod_municipio cod_municipio_antt

la var id "Station ID"
la var id_nr "Station ID, numeric"
la var code_antt "Code of station, ANTT shapefile"
la var name "Name of the station"
la var yearf "Inauguration year"
la var year "Year of railroad arrival"
la var from "Previous station"
la var from_nr "Previous station, numeric"
la var dist_from "Distance from previous station"
la var endpoint "Endpoint station"
la var dist_to_luz "Distance to Luz, through rail"
la var ldist_to_luz "Distance to Luz, linear"
la var stations_to_luz "Stations to Luz"
la var xcoord "Longitude"
la var ycoord "Latitude"
la var cod_municipio_antt "Municipality code, IBGE (from ANTT shapefile)"
la var year_next "Year of railroad arrival at the next station"
la var time_as_endpoint "Years as endpoint"
la var dist_next "Distance to next (first built) station"
la var company_nr "Company, numeric"
la var company_name "Company"


*saving:
compress
save "$workingdata/rail_stations.dta", replace	

*************************************
*ADD DISTRICT AND MUNICIPALITY CODES
*************************************

*import railroad crosswalk:
import delim "$data/station_to_district.csv", delim(";") clear

*get municipality code:
tostring cod_distrito, replace
gen cod_municipio=substr(cod_distrito,1,7)

	*label:
	la var cod_municipio "Municipality code, IBGE (from spatial merge)"
	la var cod_distrito "District code, IBGE"

*merge with railroad stations data:
rename number id
merge 1:1 id using "$workingdata/rail_stations.dta"

*and save:
drop _m
compress
save "$workingdata/rail_stations.dta", replace	
describe

*erase files that will be no longer used:
erase "$workingdata/rail_stations_from.dta"

********************************************************************************
*2) PREPARE MUNICIPALITY LEVEL DATASET
********************************************************************************

**********************************
*DEFINE THE PREVIOUS MUNICIPALITY
**********************************

*import railroad data:
use "$workingdata/rail_stations.dta", clear

*merge Estiva Gerbi (as part of Mogi-Guacu; see Online Appendix):
replace cod_municipio="3530706" if cod_municipio=="3557303"

sort id_nr
gen previous_mun = ""
replace previous_mun = cod_municipio[from_nr] if from_nr!=.

*replace to missing if the same:
replace previous_mun="" if cod_municipio==previous_mun

********************************************
*DEFINE VARIABLES OF ONLY THE FIRST STATION
********************************************

*number code to the 219 municipalities:
egen mun_id = group(cod_municipio)

*keep the values only for the first arrival:
gen first_station_mun = 0
forvalues o=1/220 {

*identify the year of first arrival:
quietly gen aux1 = year if mun_id==`o'
quietly egen aux2 = min(aux1)
quietly replace first_station_mun = 1 if mun_id==`o' & previous_mun!="" & year==aux2
drop aux1 aux2
}

	*update for the Mato Seco station (in Mogi-Guaçu, after Estiva Gerbi):
	replace first_station_mun = 0 if id=="306"
	replace company_name="" if first_station_mun==0

******************************
*COLLAPSE AT THE MUNICIPALITY
******************************

*keep only the station coordinates:
gen xcoord_rail = xcoord if first_station_mun==1
gen ycoord_rail = ycoord if first_station_mun==1

collapse (min) year dist_to_luz ///
	(max) endpoint year_last=year dist_to_luz_last=dist_to_luz ///
	(firstnm) previous_mun company_nr company_name xcoord_rail ycoord_rail ///
	, by(cod_municipio)
	
*and save:
save "$workingdata/mun_data.dta", replace

************************************************************************
*ADD OTHER MUNICIPALITY DATA: POPULATION, AREA, GEOGRAPHIC FUNDAMENTALS
************************************************************************

*import the population data:
import delim "$data/pop_mun.csv", delim(";") stringcols(_all) clear
keep cod_municipio pop2010 pop_urban2010 pop2000 pop_urban2000
destring pop2000, replace
destring pop_urban2000, replace
destring pop2010, replace
destring pop_urban2010, replace

	*remove year suffix for 2010 (baseline year):
	rename pop_urban2010 pop_urban
	rename pop2010 pop

	*and save:
	save "$workingdata/mun_aux.dta", replace
	
*import the municipality area data:
import delim "$data/mun_area.csv", delim(";") stringcols(_all) case(preserve) clear

	*merge:
	rename CD_GCMUN cod_municipio
	rename AR_MUN_2016 mun_area
	keep cod_municipio mun_area
	drop if cod_municipio==""
	merge 1:1 cod_municipio using "$workingdata/mun_aux.dta"
	keep if _m==3
	drop _m
	
	*and save:
	save "$workingdata/mun_aux.dta", replace
	
*import municipality GDP and value added:	
import delim "$data/gdp.csv", delim(";") stringcols(_all) case(preserve) clear

	*merge:
	drop if cod_municipio==""
	merge 1:1 cod_municipio using "$workingdata/mun_aux.dta"
	keep if _m==3
	drop _m
	
	*and save:
	save "$workingdata/mun_aux.dta", replace
	
*import the shapefile attribute table:
import delim "$data/mun_shapefile.csv", delim(";") stringcols(_all) case(preserve) clear	

	*prepare:
	drop ID
	rename CD_GEOCODM cod_municipio
	drop NM_MUNICIP
	foreach v in xcoord ycoord alt_mean tri_mean coffee_mean maize_mean sugar_mean malaria_mean latosol acrisol terraroxa {
	destring `v', replace
	}
	
	*and merge:
	merge 1:1 cod_municipio using "$workingdata/mun_aux.dta"
	keep if _m==3
	drop _m
	
	*and save:
	save "$workingdata/mun_aux.dta", replace
	
*and now we collapse Estiva Gerbi into Mogi-Guacu:
replace cod_municipio="3530706" if cod_municipio=="3557303"

	*destring before collapse:
	foreach v in mun_area gdp total_va ag_va man_va serv_va gov_va {
	destring `v', replace
	}

collapse (mean) xcoord ycoord alt_mean tri_mean coffee_mean maize_mean sugar_mean malaria_mean latosol acrisol terraroxa ///
 (rawsum) pop pop_urban pop2000 pop_urban2000 mun_area gdp total_va ag_va man_va serv_va gov_va [aw=mun_area], by(cod_municipio)


*and now we merge with the data on railroad arrival:
merge 1:1 cod_municipio using "$workingdata/mun_data.dta"	
keep if _m==3
drop _m

	*and save:
	save "$workingdata/mun_aux.dta", replace
	
*import the incorporation year:
import delim "$data/foundation_year.csv", delim(";") stringcols(_all) case(preserve) clear	

	*destring:
	destring found_year, replace

	*and merge:
	merge 1:1 cod_municipio using "$workingdata/mun_aux.dta"
	keep if _m==3
	drop _m
	
	*and save:
	save "$workingdata/mun_aux.dta", replace	
	
*import those that have a river:
import delim "$data/mun_rivers.csv", delim(";") stringcols(_all) case(preserve) clear	
rename CD_GEOCODM cod_municipio
keep cod_municipio
gen river = 1

	*and merge:
	merge 1:1 cod_municipio using "$workingdata/mun_aux.dta"
	drop if _m==1
	drop _m
	replace river = 0 if river!=1

*erase the temporary file:
erase "$workingdata/mun_aux.dta"

*save:
compress
save "$workingdata/mun_data.dta", replace

***************************************** 
*PREPARE ATTRIBUTES OF NEXT MUNICIPALITY
*****************************************

*save the next municipalities:
preserve
keep cod_municipio previous_mun dist_to_luz year mun_area
rename cod_municipio mun_next
rename previous_mun cod_municipio 
rename dist_to_luz dist_to_luz_next
rename year year_next
save "$workingdata/next_mun.dta", replace

*and now we merge the municipalities with those that follow it:
restore
merge 1:m cod_municipio using "$workingdata/next_mun.dta"
drop if _m==2

*generate the least year at next municipality:
bysort cod_municipio: egen year_aux = min(year_next)

*save an auxiliary value for distance to Luz:
gen  dist_to_luz_next_aux = dist_to_luz_next

*use the least year to change some variables:
replace dist_to_luz_next = . if year_next>year_aux

*generate the three following municipalities:
destring mun_next, replace
	*first one:
	gen mun_next1 = mun_next
	bysort cod_municipio: egen aux = min(mun_next1)
	replace mun_next1 = . if mun_next1>aux & mun_next1!=.
	drop aux
	*second one:
	gen mun_next2 = mun_next if mun_next1==.
	bysort cod_municipio: egen aux = min(mun_next2)
	replace mun_next2 = . if mun_next2>aux & mun_next2!=.
	drop aux
	*third one:
	gen mun_next3 = mun_next if mun_next1==. & mun_next2==.
	
*now we may generate the distance to each of these:
foreach i in 1 2 3 {
gen dist_to_luz_nextm`i' = dist_to_luz_next_aux if mun_next`i'==mun_next
}

*drop the mun next:
drop mun_next

*collapse:
collapse (mean) year_aux year dist_to_luz_next dist_to_luz ///
(min) dist_to_luz_next_min=dist_to_luz_next_aux ///
 (firstnm) mun_next* dist_to_luz_nextm*, by(cod_municipio)

*then restring:
foreach i in 1 2 3 {
tostring mun_next`i', replace
}

*generate the distance to next town (in different ways):
gen dist_next = dist_to_luz_next - dist_to_luz
gen dist_next_min = dist_to_luz_next_min - dist_to_luz
foreach i in 1 2 3 {
gen dist_next_mun`i' = dist_to_luz_nextm`i' - dist_to_luz
}

*generate time as endpoint:
gen year_next = year_aux
gen time_as_endpoint = year_aux - year


keep cod_municipio time_as_endpoint mun_next* dist_next dist_next_min dist_next_mun* year_next

merge 1:1 cod_municipio using "$workingdata/mun_data.dta"
drop _m

*update for Agudos due to Bauru being a crossroad (EFS arrived in 1903, in Bauru only in 1905):
replace time_as_endpoint = 1905-1903 if cod_municipio=="3500709"
replace dist_next = 323.42892-304.2023 if cod_municipio=="3500709"
replace dist_next_min = dist_next if cod_municipio=="3500709"
replace dist_next_mun1 = dist_next if cod_municipio=="3500709"
replace mun_next1="3506003" if cod_municipio=="3500709"

*update for Pederneiras due to Bauru being a crossroad (Paulista arrived in 1903, in Bauru only in 1910):
replace time_as_endpoint = 1910-1903 if cod_municipio=="3536703"
replace dist_next = 351.80771-321.97275 if cod_municipio=="3536703"
replace dist_next_min = dist_next if cod_municipio=="3536703"
replace dist_next_mun1 = dist_next if cod_municipio=="3536703"
replace mun_next1="3506003" if cod_municipio=="3536703"

*update for Araçatuba (where a different route followed, but abandoned):
replace time_as_endpoint=0 if cod_municipio=="3502804"

*update for Rio Claro (not an endpoint, only a small railway branch):
replace endpoint=0 if cod_municipio=="3543907"

*saving:
compress
save "$workingdata/mun_data.dta", replace	

*erase auxiliary file:
erase "$workingdata/next_mun.dta"

****************************
*ADD MICRO AND MESO REGIONS
****************************

import delim "$data/regions.csv", delim(";") stringcols(_all) case(preserve) clear	

*keep only the codes:
keep cod_*

*now we merge and save:
merge 1:1 cod_municipio using "$workingdata/mun_data.dta"
keep if _m==3
drop _m
save "$workingdata/mun_data.dta", replace

*******************************************************
*PREPARE DISTANCE TO THE NEXT INCORPORATED MUNICPALITY
*******************************************************

*generate the variables of interest:
gen dist_nextfound = .
gen dist_prfound = .	
gen dist_nextfound_10yr = .

*generate an id:
egen id_aux = group(cod_municipio)
sort id_aux

*get the auxiliary numeric mun code and previous mun code:
destring cod_municipio, gen(cod_municipio_num)
destring previous_mun, gen(previous_mun_num)
forvalues k=1/3 {
destring mun_next`k', gen(mun_next`k'_num)
}

*and now we loop for the next and previous incorporated municipalities:
forvalues i=1/219 {

di " 	*** Mun.: `i'/219 	*** "

*characteristics of the municipality i:
quietly gen aux1 = year if id_aux==`i'
egen year_aux = mean(aux1)
quietly gen aux2 = dist_to_luz if id_aux==`i'
egen dist_to_luz_aux = mean(aux2)
quietly gen aux3 = company_nr if id_aux==`i'
egen company_nr_aux = mean(aux3)
drop aux*

	*generate the sets:
	gen set_next = 0
	gen set_pr = 0

	*the set includes the own town:
	quietly replace set_next = 1 if id_aux==`i'
	quietly replace set_pr = 1 if id_aux==`i'
	
	*we make a large loop for the search:
	forvalues k=1/100 {
	
	*it also includes whether some previous town is in the set too:
	forvalues j=1/218 {
	
	*create a constant variable with the indicator for the previous municipality:
	quietly gen aux4 = previous_mun_num if id_aux==`j'
	quietly egen previous_mun_aux = mean(aux4)
		drop aux4
		
	*and for the next municipalities:
	forvalues k=1/3{
	quietly gen aux4 = mun_next`k'_num if id_aux==`j'
	quietly egen mun_next`k'_aux = mean(aux4)
		drop aux4
	}
		
	*identify whether the previous municipality is in the set:	
	gen aux5 = 0
	quietly replace aux5 = set_next if cod_municipio_num==previous_mun_aux
	quietly egen previous_set_next = max(aux5)
		drop aux5
		
	*and whether any of the next municipalities are in the set:
	gen aux5 = 0
	quietly replace aux5 = set_pr if cod_municipio_num==mun_next1_aux | cod_municipio_num==mun_next2_aux | cod_municipio_num==mun_next3_aux
	quietly egen next_set_pr = max(aux5)
		drop aux5	
	
	*and replace:
	quietly replace set_next = 1 if previous_set_next==1 & id_aux==`j'
	quietly replace set_pr = 1 if next_set_pr==1 & id_aux==`j'
	
	*drop:
	drop previous_set_next previous_mun_aux next_set_pr mun_next*_aux 
	
	}
	}

*now find the distance we need:
quietly gen dist_aux1 = dist_to_luz - dist_to_luz_aux if set_next==1 & found_year<=year_aux & company_nr==company_nr_aux & dist_to_luz>dist_to_luz_aux &year>=year_aux
quietly egen dist_aux2 = min(dist_aux1)

	*and for previous municipalities:
	quietly gen dist_aux3 = -(dist_to_luz - dist_to_luz_aux) if set_pr==1 & found_year<=year_aux & company_nr==company_nr_aux & dist_to_luz<dist_to_luz_aux &year<=year_aux
	quietly egen dist_aux4 = min(dist_aux3)
	
	quietly gen dist_aux5 = dist_to_luz - dist_to_luz_aux if set_next==1 & found_year<=year_aux-10 & company_nr==company_nr_aux & dist_to_luz>dist_to_luz_aux &year>=year_aux
	quietly egen dist_aux6 = min(dist_aux5)

	*and replace:
	quietly replace dist_nextfound = dist_aux2 if id_aux==`i'
	quietly replace dist_prfound = dist_aux4 if id_aux==`i'
	quietly replace dist_nextfound_10yr = dist_aux6 if id_aux==`i'

*and now we drop:
drop year_aux dist_to_luz_aux company_nr_aux
drop set_next set_pr dist_aux*

}

*and we drop the auxiliar id variable:
drop id_aux cod_municipio_num previous_mun_num mun_next*_num

*update for Agudos and Pederneiras (Bauru was already incorporated):
replace dist_nextfound = dist_next if cod_municipio=="3500709"
replace dist_nextfound = dist_next if cod_municipio=="3536703"
replace dist_nextfound_10yr = dist_next if cod_municipio=="3500709"
replace dist_nextfound_10yr = dist_next if cod_municipio=="3536703"

save "$workingdata/mun_data.dta", replace

*******************************************
*MERGE WITH HISTORICAL NATIONAL GDP GROWTH
*******************************************

import delim "$data/historical_agggdp.csv", delim(";") case(preserve) clear	
	drop if year==.
		
*and now we merge:
merge 1:m year using "$workingdata/mun_data.dta"		
drop if _m==1
drop _m

*saving:
save "$workingdata/mun_data.dta", replace	

********************
*LABELING VARIABLES
********************

la var year "Year of railroad arrival"
la var year_last "Year of railroad arrival (last)"
la var endpoint "Endpoint municipality, current"
la var dist_to_luz "Distance to Luz, through rail"
la var dist_to_luz_last "Distance to Luz (last station)"
la var cod_municipio "Municipality code, IBGE"
la var company_nr "Company, numeric"
la var company_name "Company"
la var previous_mun "Previous municipality"
foreach i in 1 2 3 {
la var mun_next`i' "Next municipality, `i'"
la var dist_next_mun`i' "Distance to next municipality `i'"
}
la var time_as_endpoint "Years as endpoint"
la var year_next "Year of arrival to next municipality"
la var dist_next "Distance to next (first built) station"
la var dist_next_min "Distance to next (closest) station"
la var mun_area "Municipality area (sq. km)"
la var pop "Population, 2010"
la var pop_urban "Urban population, 2010"
la var found_year "Year of incorporation"
la var xcoord "Longitude"
la var ycoord "Latitude"
la var xcoord_rail "Longitude of first rail station"
la var ycoord_rail "Latitude of first rail station"
la var alt_mean "Altitude (mean)"
la var tri_mean "TRI (mean)"
la var coffee_mean "Potential yield of coffee"
la var maize_mean "Potential yield of maize"
la var sugar_mean "Potential yield of sugar"
la var malaria_mean "Days a year that support malaria transmission"
la var river "Main river dummy"
la var latosol "Share of latosols"
la var acrisol "Share of acrisols"
la var terraroxa "Share of terra roxa"

la var pop2000 "Population, 2000"
la var pop_urban2000 "Urban population, 2000"

la var gdp_growth "GDP growth"
la var pcgdp_growth "Per capita GDP growth"
la var pop_growth "Population growth"

la var cod_rgi "Micro-region code (Regiao Imediata)"
la var cod_rgint "Meso-region code (Regiao Intermediaria)"

la var dist_nextfound "Distance to next incorporated city on the company line (km)"
la var dist_prfound "Distance to previous incorporated city on the company line (km)"

la var dist_nextfound_10yr "Distance to next incorporated city on the company line (km), 10 year lag"

la var gdp "Total municipal GDP, 1,000 R$s, 2010"
la var total_va "Total municipal vaue added,  1,000 R$s, 2010"
la var ag_va "Agriculture municipal vaue added,  1,000 R$s, 2010"
la var man_va "Manufacturing municipal vaue added,  1,000 R$s, 2010"
la var serv_va "Services municipal vaue added,  1,000 R$s, 2010"
la var gov_va "Government municipal vaue added,  1,000 R$s, 2010"

*generate 6 digit numeric IBGE code to merge with census data:
gen cod_municipio_6dt = cod_municipio
	destring cod_municipio_6dt, replace
	replace cod_municipio_6dt = floor(cod_municipio_6dt/10)
	la var cod_municipio_6dt "Municipality code, IBGE, 6 digit"

*generate a dummy if founded before the arrival year:
gen preexistent_mun = 0
replace preexistent_mun = 1 if found_year<=year & year!=. & found_year!=.
la var preexistent_mun "Incorporated before railroad arrival"	

*saving:
compress
save "$workingdata/mun_data.dta", replace	
describe

***********************************************
*PREPARE DISTANCE TO THE PREVIOUS MUNICIPALITY
***********************************************

*keep variables we need:
keep cod_municipio dist_next_mun* mun_next* time_as_endpoint
rename cod_municipio previous_mun
foreach i in 1 2 3 {
rename dist_next_mun`i' dist_aux_mun`i'
rename mun_next`i' mun_aux`i'
}

*rename previous time as endpoint:
rename time_as_endpoint time_as_endpoint_previous
la var time_as_endpoint_previous "Years as endpoint (previous mun.)"

*now merge:
merge 1:m previous_mun using "$workingdata/mun_data.dta"
drop if _m==1
drop _m

*now find the distance to the previous town:
gen dist_previous = dist_aux_mun1 if mun_aux1==cod_municipio
replace dist_previous = dist_aux_mun2 if mun_aux2==cod_municipio
replace dist_previous = dist_aux_mun3 if mun_aux3==cod_municipio
la var dist_previous "Distance to previous municipality"

*exclude auxiliary variables:
drop dist_aux_mun*
drop mun_aux*

*saving:
compress
save "$workingdata/mun_data.dta", replace	

**********************************************************
*PREPARE LINEAR DISTANCES TO NEXT AND LAST MUNICIPALITIES
**********************************************************

*keep variables to merge:
keep cod_municipio xcoord_rail ycoord_rail year

	*I need to add coordinates for São Paulo (using Estação da Luz):
	replace xcoord_rail=-46.6348909 if cod_municipio=="3550308"
	replace ycoord_rail=-23.535186 if cod_municipio=="3550308"

	*rename variables:
	rename xcoord_rail long_rail
	rename ycoord_rail lat_rail
	rename year year_arrival_next

	*and generate the names:
	foreach v in mun_next1 mun_next2 mun_next3 previous_mun {
	gen `v'=cod_municipio
	}
	drop cod_municipio

	
	*and save:
	save "$workingdata/coordinates_aux.dta", replace
	
*now merge the main data with the coordinates:
use "$workingdata/mun_data.dta", clear
		
	*merge with coordinates of previous town:
	merge m:1 previous_mun using "$workingdata/coordinates_aux.dta", keepusing(lat_rail long_rail)	
	drop if _m==2
	drop _m
	
		*generate the distance to previous:
		geodist ycoord_rail xcoord_rail lat_rail long_rail, gen(dist_previous_linear)
		la var dist_previous_linear "Distance previous municipality (linear, between rail stations)"
		drop lat_rail long_rail
		
	*merge with coordinates of next town:
	foreach k in 1 2 3 {
	merge m:1 mun_next`k' using "$workingdata/coordinates_aux.dta", keepusing(lat_rail long_rail year_arrival_next)	
	drop if _m==2
	drop _m
	
		*generate the distance to next three:
		geodist ycoord_rail xcoord_rail lat_rail long_rail, gen(dist_next`k'_linear)
		replace dist_next`k'_linear = . if year_next<year_arrival_next
		drop lat_rail long_rail year_arrival_next
	}	
		
	*identify the distance to next:
	egen dist_next_linear = rowmean(dist_next*_linear)
	drop dist_next1_linear dist_next2_linear dist_next3_linear
	la var dist_next_linear "Distance to next municipality (linear, between rail stations)"
	
	*make the changes for Pederneiras (we need to use coordinates of Aimores station as first of Bauru):
		gen aux_xcoord = -48.9998600
		gen aux_ycoord = -22.3030109
		geodist ycoord_rail xcoord_rail aux_ycoord aux_xcoord, gen(aux_distance)
		replace dist_next_linear = aux_distance if cod_municipio=="3536703"
		drop aux_xcoord aux_ycoord aux_distance
	
	
*saving:
compress
save "$workingdata/mun_data.dta", replace	

*erase temporary files that are no longer used:
erase "$workingdata/coordinates_aux.dta"

********************************************************************************
*3) PREPARE DATA AT THE AMC LEVEL, FOR PREVIOUS CENSUS YEARS
********************************************************************************

*******************
*PREPARE AMC CODES
*******************

import delim "$data/IPEADATA_Municipios_X_AMCs.csv", delim(";") case(preserve) clear	

gen estado=""
keep UFMUNDV_CHAR UFMUN NOMEMUN NEW_CODE_1991_1997 NEW_CODE_1970_1997 estado
rename NEW_CODE_1991_1997 amc1991
rename NEW_CODE_1970_1997 amc1970
rename NOMEMUN mun

replace mun = strtrim(mun)
replace mun = strlower(mun)

*we generate the state name:
split UFMUNDV_CHAR
drop UFMUNDV_CHAR2
rename UFMUNDV_CHAR1 estado_code

replace estado="mg" if estado_code=="31"
replace estado="sp" if estado_code=="35"
replace estado="pr" if estado_code=="41"

*rename Altonia/PR:
replace mun="altonia" if mun=="altania"

*rename Vila Alta to Alto Paraiso
replace mun="alto paraiso" if mun=="vila alta"


keep if estado=="pr" | estado=="sp" | estado=="mg"

save "$workingdata/amc.dta", replace


********************************************
*PREPARE 1970 AMC DATA (FOR 1970, 1980 POP)
********************************************

*import population:
import delim "$data/pop_mun.csv", delim(";") stringcols(_all) case(preserve) clear
keep cod_municipio pop1970 pop_urban1970 pop1980 pop_urban1980 pop1991 pop_urban1991 pop2000 pop_urban2000 pop2010 pop_urban2010
foreach y in 1970 1980 1991 2000 2010 {
destring pop`y', replace
destring pop_urban`y', replace
}

	*and save:
	save "$workingdata/amc_aux.dta", replace

*import the incorporation year:
import delim "$data/foundation_year.csv", delim(";") stringcols(_all) case(preserve) clear

	*merge:
	merge 1:1 cod_municipio using "$workingdata/amc_aux.dta"
	drop if _m==1
	drop _m
	save "$workingdata/amc_aux.dta", replace

*import the municipality area data:
import delim "$data/mun_area.csv", delim(";") stringcols(_all) case(preserve) clear

	*merge:
	rename CD_GCMUN cod_municipio
	rename AR_MUN_2016 mun_area
	keep cod_municipio mun_area
	drop if cod_municipio==""
	merge 1:1 cod_municipio using "$workingdata/amc_aux.dta"
	keep if _m==3
	drop _m
	
	*and save:
	destring found_year, replace
	destring mun_area, replace
	save "$workingdata/amc_aux.dta", replace
	
*import the attribute table with coordinates from SP, MG, and PR municipalities:
import delim "$data/mun_coordinates.csv", delim(";") stringcols(_all) case(preserve) clear	

	*prepare:
	drop ID
	rename CD_GEOCODM cod_municipio
	keep cod_municipio xcoord ycoord
	foreach v in xcoord ycoord {
	destring `v', replace
	}
	
	*and merge:
	merge 1:1 cod_municipio using "$workingdata/amc_aux.dta"
	keep if _m==3
	drop _m	
		
*now we merge with AMC:
destring cod_municipio, gen(UFMUN)
replace UFMUN = floor(UFMUN/10)
merge m:1 UFMUN using "$workingdata/amc.dta"
	keep if _m==3
	*rename amc:
	rename amc1970 amc
	*we also need to take into account Guatapara (part of Ribeirão Preto, but on the Paulista instead of the Mogiana):
	replace amc="guatapara" if mun=="guatapara"
	
*and now we may collapse by AMC:
collapse (rawsum) mun_area pop* (min) found_year (mean) xcoord ycoord [aw=mun_area], by(amc)
	save "$workingdata/amc_aux.dta", replace	

*open rail station data:
use "$workingdata/rail_stations.dta", clear

	*merge Estiva Gerbi (as part of Mogi-Guacu):
	replace cod_municipio="3530706" if cod_municipio=="3557303"

*merge with amc:
destring cod_municipio, gen(UFMUN)
replace UFMUN = floor(UFMUN/10)
merge m:1 UFMUN using "$workingdata/amc.dta"
	*all merged, so we keep only matched municipalities:
	keep if _m==3
	drop _m
	*rename amc:
	rename amc1970 amc
	*we also need to take into account Guatapara (part of Ribeirão Preto, but on the Paulista instead of the Mogiana):
	replace amc="guatapara" if mun=="guatapara"
	*drop other variables:
	drop mun estado_code amc1991
	
*define the previous municipality:	
sort id_nr
gen previous_amc = ""
replace previous_amc = amc[from_nr] if from_nr!=.

	*replace to missing if the same:
	replace previous_amc="" if amc==previous_amc
	
*number code to the 191 AMCs:
egen amc_id = group(amc)
sum amc_id

*keep the values only for the first arrival:
gen first_station_amc = 0
forvalues o=1/212 {

*identify the year of first arrival:
quietly gen aux1 = year if amc_id==`o'
quietly egen aux2 = min(aux1)
quietly replace first_station_amc = 1 if amc_id==`o' & previous_amc!="" & year==aux2
drop aux1 aux2
}

	*exclude Mato Seco:
	replace first_station_amc = 0 if id=="306"
	replace company_name="" if first_station_amc==0	
	

*collapse at the AMC level:
collapse (min) year dist_to_luz ///
	(max) endpoint year_last=year dist_to_luz_last=dist_to_luz ///
	(firstnm) previous_amc company_nr company_name ///
	, by(amc)
	
*and save:
save "$workingdata/amc1970_data.dta", replace	

*save the next AMCs:
preserve
keep amc previous_amc dist_to_luz year 
rename amc amc_next
rename previous_amc amc
rename dist_to_luz dist_to_luz_next
rename year year_next
save "$workingdata/next_amc.dta", replace

*and now we merge the municipalities with those that follow it:
restore
merge 1:m amc using "$workingdata/next_amc.dta"
	*drop São Paulo city:
	drop if _m==2

*generate the least year:
bysort amc: egen year_aux = min(year_next)
replace dist_to_luz_next = . if year_next>year_aux

*generate the three following AMCs:
egen aux = group(amc_next)
bysort amc: egen rank = rank(aux)
drop aux
	foreach i in 1 2 3 4 {
	gen amc_next`i'= amc_next if rank==`i'
	}
	*drop the mun next:
	drop amc_next

*and collapse:
collapse (mean) year_aux year dist_to_luz_next dist_to_luz (firstnm) amc_next* , by(amc)

*and then we now generate time as endpoint:
gen time_as_endpoint = year_aux - year
gen dist_next = dist_to_luz_next - dist_to_luz

keep amc time_as_endpoint dist_to_luz amc_next* dist_next

merge 1:1 amc using "$workingdata/amc1970_data.dta"
drop _m

*update for Agudos (EFS arrived in 1903, in Bauru only in 1905):
replace time_as_endpoint = 1905-1903 if amc=="35AMC7097002"
replace dist_next = 323.42892-304.2023 if amc=="35AMC7097002"
replace amc_next1 = "350600" if amc=="35AMC7097002"

*update for Pederneiras (Paulista arrived in 1903, in Bauru only in 1910):
replace time_as_endpoint = 1910-1903 if amc=="353670"
replace dist_next = 351.80771-321.97275 if amc=="353670"
replace amc_next1="350600" if amc=="353670"

*update for Araçatuba (where a different route followed in the Noroeste):
replace time_as_endpoint=0 if amc=="35AMC7097007"

*change Rio Claro (it has a small branch, but it is not an endpoint):
replace endpoint=0 if amc=="354390"

*drop Guataparã and Ribeirão Preto, which were in the same municipality in 1970:
drop if amc=="guatapara" | amc=="35AMC7097071"

*saving:
save "$workingdata/amc1970_data.dta", replace	

*merge with the AMC data:
merge 1:1 amc using "$workingdata\amc_aux.dta"
keep if _m==3
drop _m

*now we label:
la var year "Year of railroad arrival"
la var year_last "Year of railroad arrival (last)"
la var endpoint "Endpoint municipality, current"
la var dist_to_luz "Distance to Luz, through rail"
la var dist_to_luz_last "Distance to Luz (last station)"

foreach y in 1970 1980 1991 2000 2010 {
la var pop`y' "Population, `y'"
la var pop_urban`y' "Urban population, `y'"
}

la var company_nr "Company, numeric"
la var company_name "Company"

la var amc "AMC (1970-.)"
la var previous_amc "Previous AMC"
foreach i in 1 2 3 4 {
la var amc_next`i' "Next AMC, `i'"
}

la var time_as_endpoint "Years as endpoint"
la var dist_next "Distance to next (first built) station"
la var mun_area "Municipality area (sq. km)"
la var found_year "Year of incorporation"
la var xcoord "Longitude"
la var ycoord "Latitude"

*drop São Paulo:
drop if dist_to_luz==0

*and now we save:
compress
save "$workingdata/amc1970_data.dta", replace	

**********************************
*PREPARE 1991 AMC DATA (1991 POP)
**********************************

*import population:
import delim "$data/pop_mun.csv", delim(";") stringcols(_all) case(preserve) clear
keep cod_municipio pop1991 pop_urban1991 pop2000 pop_urban2000 pop2010 pop_urban2010
foreach y in 1991 2000 2010 {
destring pop`y', replace
destring pop_urban`y', replace
}

	*and save:
	save "$workingdata/amc_aux.dta", replace

*import the incorporation year:
import delim "$data/foundation_year.csv", delim(";") stringcols(_all) case(preserve) clear

	*merge:
	merge 1:1 cod_municipio using "$workingdata/amc_aux.dta"
	drop if _m==1
	drop _m
	save "$workingdata/amc_aux.dta", replace

*import the municipality area data:
import delim "$data/mun_area.csv", delim(";") stringcols(_all) case(preserve) clear

	*merge:
	rename CD_GCMUN cod_municipio
	rename AR_MUN_2016 mun_area
	keep cod_municipio mun_area
	drop if cod_municipio==""
	merge 1:1 cod_municipio using "$workingdata/amc_aux.dta"
	keep if _m==3
	drop _m
	
	*and save:
	destring found_year, replace
	destring mun_area, replace
	save "$workingdata/amc_aux.dta", replace

*import the attribute table with coordinates from SP, MG, and PR municipalities:
import delim "$data/mun_coordinates.csv", delim(";") stringcols(_all) case(preserve) clear		

	*prepare:
	drop ID
	rename CD_GEOCODM cod_municipio
	keep cod_municipio xcoord ycoord
	foreach v in xcoord ycoord {
	destring `v', replace
	}
	
	*and merge:
	merge 1:1 cod_municipio using "$workingdata/amc_aux.dta"
	keep if _m==3
	drop _m		
	
	
*now we merge with AMC:
destring cod_municipio, gen(UFMUN)
replace UFMUN = floor(UFMUN/10)
merge m:1 UFMUN using "$workingdata/amc.dta"
	keep if _m==3
	*rename amc:
	rename amc1991 amc
	*we also need to take into account Guatapara (part of Ribeirão Preto, but on the Paulista instead of the Mogiana):
	replace amc="guatapara" if mun=="guatapara"
	
*and now we may collapse by AMC:
collapse (rawsum) mun_area pop* (min) found_year (mean) xcoord ycoord [aw=mun_area], by(amc)
	save "$workingdata/amc_aux.dta", replace	

*open rail station data:
use "$workingdata/rail_stations.dta", clear

	*merge Estiva Gerbi (as part of Mogi-Guacu):
	replace cod_municipio="3530706" if cod_municipio=="3557303"

*merge with amc:
destring cod_municipio, gen(UFMUN)
replace UFMUN = floor(UFMUN/10)
merge m:1 UFMUN using "$workingdata/amc.dta"
	*all merged, so we keep only matched municipalities:
	keep if _m==3
	drop _m
	*rename amc:
	rename amc1991 amc
	*we also need to take into account Guatapara (part of Ribeirão Preto, but on the Paulista instead of the Mogiana):
	replace amc="guatapara" if mun=="guatapara"
	*drop other variables:
	drop mun estado_code amc1970
	
*define the previous municipality:	
sort id_nr
gen previous_amc = ""
replace previous_amc = amc[from_nr] if from_nr!=.

	*replace to missing if the same:
	replace previous_amc="" if amc==previous_amc

*number code to the 191 AMCs:
egen amc_id = group(amc)
sum amc_id

*keep the values only for the first arrival:
gen first_station_amc = 0
forvalues o=1/213 {

*identify the year of first arrival:
quietly gen aux1 = year if amc_id==`o'
quietly egen aux2 = min(aux1)
quietly replace first_station_amc = 1 if amc_id==`o' & previous_amc!="" & year==aux2
drop aux1 aux2
}

	*exclude Mato Seco:
	replace first_station_amc = 0 if id=="306"
	replace company_name="" if first_station_amc==0	
	
*collapse at AMC:
collapse (min) year dist_to_luz ///
	(max) endpoint year_last=year dist_to_luz_last=dist_to_luz ///
	(firstnm) previous_amc company_nr company_name ///
	, by(amc)
	
*and save:
save "$workingdata/amc1991_data.dta", replace	

*save the next AMCs:
preserve
keep amc previous_amc dist_to_luz year 
rename amc amc_next
rename previous_amc amc
rename dist_to_luz dist_to_luz_next
rename year year_next
save "$workingdata/next_amc.dta", replace

*and now we merge the municipalities with those that follow it:
restore
merge 1:m amc using "$workingdata/next_amc.dta"
	*drop São Paulo city:
	drop if _m==2

*generate the least year:
bysort amc: egen year_aux = min(year_next)
replace dist_to_luz_next = . if year_next>year_aux

*generate the three following AMCs:
egen aux = group(amc_next)
bysort amc: egen rank = rank(aux)
drop aux
	foreach i in 1 2 3 4 {
	gen amc_next`i'= amc_next if rank==`i'
	}
	*drop the mun next:
	drop amc_next

*and collapse:
collapse (mean) year_aux year dist_to_luz_next dist_to_luz (firstnm) amc_next* , by(amc)

*and then we now generate the variables of interest:
gen time_as_endpoint = year_aux - year
gen dist_next = dist_to_luz_next - dist_to_luz

keep amc time_as_endpoint dist_to_luz amc_next* dist_next

merge 1:1 amc using "$workingdata/amc1991_data.dta"
drop _m

*update for Agudos (EFS arrived in 1903, in Bauru only in 1905):
replace time_as_endpoint = 1905-1903 if amc=="35AMC9197002"
replace dist_next = 323.42892-304.2023 if amc=="35AMC9197002"
replace amc_next1 = "350600" if amc=="35AMC9197002"

*update for Pederneiras (Paulista arrived in 1903, in Bauru only in 1910):
replace time_as_endpoint = 1910-1903 if amc=="353670"
replace dist_next = 351.80771-321.97275 if amc=="353670"
replace amc_next1="350600" if amc=="353670"

*change for Araçatuba (where a different route followed in the Noroeste):
replace time_as_endpoint=0 if amc=="35AMC6097007"

*and change Rio Claro (only a small branch, but not an endpoint):
replace endpoint=0 if amc=="35AMC6097072"

*drop Guataparã and Ribeirão Preto, which were in the same municipality in 1991:
drop if amc=="guatapara" | amc=="35AMC6097071"

*saving:
save "$workingdata/amc1991_data.dta", replace	

*merge with AMC data:
merge 1:1 amc using "$workingdata/amc_aux.dta"
keep if _m==3
drop _m

*now we label:
la var year "Year of railroad arrival"
la var year_last "Year of railroad arrival (last)"
la var endpoint "Endpoint municipality, current"
la var dist_to_luz "Distance to Luz, through rail"
la var dist_to_luz_last "Distance to Luz (last station)"

foreach y in 1991 2000 2010 {
la var pop`y' "Population, `y'"
la var pop_urban`y' "Urban population, `y'"
}

la var company_nr "Company, numeric"
la var company_name "Company"

la var amc "AMC (1991-.)"
la var previous_amc "Previous AMC"
foreach i in 1 2 3 4 {
la var amc_next`i' "Next AMC, `i'"
}

la var time_as_endpoint "Years as endpoint"
la var dist_next "Distance to next (first built) station"
la var mun_area "Municipality area (sq. km)"
la var found_year "Year of incorporation"
la var xcoord "Longitude"
la var ycoord "Latitude"

*drop São Paulo:
drop if dist_to_luz==0

*and now we save:
compress
save "$workingdata/amc1991_data.dta", replace	
erase  "$workingdata/amc_aux.dta"
erase "$workingdata/next_amc.dta"

********************************************************************************
*4) PREPARE DATA AT THE DISTRICT LEVEL
********************************************************************************

********************************
*MERGE CROSSWALK AND RAIL DATA
*******************************

*import railroad crosswalk:
import delim "$data/station_to_district.csv", delim(";") clear	

*get district codes to be string:
tostring cod_distrito, replace

*merge with railroad stations data:
rename number id
merge 1:1 id using "$workingdata/rail_stations.dta"
drop _m
	
*merge Estiva Gerbi (as part of Mogi-Guacu):
replace cod_distrito="353070605" if cod_distrito=="355730305"	
	
******************************
*DEFINE THE PREVIOUS DISTRICT
******************************

sort id_nr
gen previous_dist = ""
replace previous_dist = cod_distrito[from_nr] if from_nr!=.

*replace to missing if the same:
replace previous_dist="" if cod_distrito==previous_dist

********************************************
*DEFINE VARIABLES OF ONLY THE FIRST ARRIVAL
********************************************

*number code to the 251 districts:
egen dist_id = group(cod_distrito)

*keep the values only for the first arrival:
gen first_station_dist = 0
forvalues o=1/250 {

*identify the year of first arrival:
quietly gen aux1 = year if dist_id==`o'
quietly egen aux2 = min(aux1)
quietly replace first_station_dist = 1 if dist_id==`o' & previous_dist!="" & year==aux2
drop aux1 aux2
}

	replace company_name="" if first_station_dist==0
	
********************************
*COLLAPSE AT THE DISTRICT LEVEL
********************************

*keep only the station coordinates:
gen xcoord_rail = xcoord if first_station_dist==1
gen ycoord_rail = ycoord if first_station_dist==1

collapse (min) year dist_to_luz ///
	(max) endpoint year_last=year dist_to_luz_last=dist_to_luz ///
	(firstnm) previous_dist company_nr company_name xcoord_rail ycoord_rail ///
	, by(cod_distrito)
	
*and save:
save "$workingdata/dist_data.dta", replace		

************************************************
*ADD POPULATION, AREA, AND CENTROID COORDINATES
************************************************

*import the population data:
import delim "$data/pop_district.csv", delim(";") case(preserve) stringcols(_all) clear	
drop name

	*rename and destring:
	rename pop_total pop
	destring pop, replace
	replace pop_urban="0" if pop_urban=="-"
	destring pop_urban, replace
	
	*save:
	save "$workingdata/dist_aux.dta", replace
	
*import the coordinates:
import delim "$data/coord_district.csv", delim(";") clear

	*merge and save:
	tostring cod_distrito, replace
	merge 1:1 cod_distrito using "$workingdata/dist_aux.dta"
	drop _m
	save "$workingdata/dist_aux.dta", replace
	
*import the area:
import delim "$data/area_district.csv", delim(";") clear

	*merge and save:
	tostring cod_distrito, replace
	merge 1:1 cod_distrito using "$workingdata/dist_aux.dta"
	drop _m
	save "$workingdata/dist_aux.dta", replace
	
*and now we collapse Estiva Gerbi into Mogi-Guacu:
replace cod_distrito="353070605" if cod_distrito=="355730305"	

	*collapse:
	collapse (mean) xcoord ycoord (rawsum) area pop pop_urban [aw=area], by(cod_distrito)
	
	*rename area:
	rename area dist_area
	
*now merge with the other district data set:
merge 1:1 cod_distrito using "$workingdata/dist_data.dta"	
keep if _m==3
drop _m

	*save:
	save "$workingdata/dist_data.dta", replace	
	
*********************************
*GET ATTRIBUTES OF NEXT DISTRICT
*********************************

*save the next districts:
preserve
keep cod_distrito previous_dist dist_to_luz year
rename cod_distrito district_next
rename previous_dist cod_distrito 
rename dist_to_luz dist_to_luz_next
rename year year_next
save "$workingdata/next_dist.dta", replace	

*now merge the districts with those that follow it:
restore
merge 1:m cod_distrito using "$workingdata/next_dist.dta"
drop if _m==2

*generate the least year:
bysort cod_distrito: egen year_aux = min(year_next)

*save an auxiliary value for distance to Luz:
gen  dist_to_luz_next_aux = dist_to_luz_next

*use the least year to change some variables:
replace dist_to_luz_next = . if year_next>year_aux
	
*generate the three following districts:
destring district_next, gen(aux)
bysort cod_distrito: egen aux2=rank(aux), track
	tab aux2
foreach i in 1 2 3 {
gen district_next`i' = district_next if aux2==`i'
}
drop aux aux2
	
*now we may generate the distance to each of these:
foreach i in 1 2 3 {
gen dist_to_luz_nextd`i' = dist_to_luz_next_aux if district_next`i'==district_next
}

*drop the mun next:
drop district_next

*and collapse:
collapse (mean) year_aux year dist_to_luz_next dist_to_luz ///
(min) dist_to_luz_next_min=dist_to_luz_next_aux ///
(firstnm) district_next* dist_to_luz_nextd*, by(cod_distrito)

*generate the distance to next district:
gen dist_next = dist_to_luz_next - dist_to_luz
foreach i in 1 2 3 {
gen dist_next_dist`i' = dist_to_luz_nextd`i' - dist_to_luz
}

*generate time as endpoint:
gen year_next = year_aux
gen time_as_endpoint = year_aux - year

keep cod_distrito time_as_endpoint year_next district_next* dist_next dist_next_dist*

merge 1:1 cod_distrito using "$workingdata/dist_data.dta"
drop _m

*update for Agudos (EFS arrived in 1903, in Bauru only in 1905):
replace time_as_endpoint = 1905-1903 if cod_distrito=="350070905"
replace dist_next = 323.42892-304.2023 if cod_distrito=="350070905"
replace district_next1="3506003" if cod_distrito=="350070905"

*update for Pederneiras, Guaianas district (Paulista arrived in 1910, in Bauru in 1910):
replace time_as_endpoint = 1910-1910 if cod_distrito=="353670310"
replace dist_next = 351.80771-328.96741 if cod_distrito=="353670310"
replace district_next1="3506003" if cod_distrito=="353670310"

*update for Araçatuba (where a different route followed):
replace time_as_endpoint=0 if cod_distrito=="350280405"

*update for Rio Claro (not an endpoint):
replace endpoint=0 if cod_distrito=="354390705"

*saving:
save "$workingdata/dist_data.dta", replace	

*******************************************
*PREPARE DISTANCE TO THE PREVIOUS DISTRICT
*******************************************

*keep variables we need:
keep cod_distrito dist_next_dist* district_next*
rename cod_distrito previous_dist
foreach i in 1 2 3 {
rename dist_next_dist`i' dist_aux_dist`i'
rename district_next`i' district_aux`i'
}

*now merge:
merge 1:m previous_dist using "$workingdata/dist_data.dta"
drop if _m==1
drop _m

*now find the distance to the previous town:
gen dist_previous = dist_aux_dist1 if district_aux1==cod_distrito
replace dist_previous = dist_aux_dist2 if district_aux2==cod_distrito
replace dist_previous = dist_aux_dist3 if district_aux3==cod_distrito
la var dist_previous "Distance to previous municipality"

*exclude auxiliary variables:
drop dist_aux_dist*
drop district_aux*

*saving:
compress
save "$workingdata/dist_data.dta", replace	

******************************************************
*ADD WHETHER DISTRICT WAS IN PREEXISTENT MUNICIPALITY
******************************************************

*generate mun code:
gen cod_municipio=substr(cod_distrito,1,7)

merge m:1 cod_municipio using "$workingdata/mun_data.dta", keepusing(preexistent_mun)
drop _merge

*****************
*LABEL VARIABLES
*****************

la var year "Year of railroad arrival"
la var year_last "Year of railroad arrival (last)"
la var endpoint "Endpoint district, current"
la var dist_to_luz "Distance to Luz, through rail"
la var dist_to_luz_last "Distance to Luz (last station)"
la var cod_municipio "Municipality code, IBGE"
la var cod_distrito "District code, IBGE"
la var company_nr "Company, numeric"
la var company_name "Company"
la var previous_dist "Previous district"
foreach i in 1 2 3 {
la var district_next`i' "Next district `i'"
la var dist_next_dist`i' "Distance to next district `i'"
}
la var time_as_endpoint "Years as endpoint"
la var year_next "Year of arrival to next district"
la var dist_next "Distance to next (first built) station"
la var dist_area "District area (sq. km)"
la var pop "Population, 2010"
la var pop_urban "Urban population, 2010"
la var xcoord "Longitude"
la var ycoord "Latitude"
la var xcoord_rail "Longitude of first rail station"
la var ycoord_rail "Latitude of first rail station"

*saving:
compress
save "$workingdata/dist_data.dta", replace	

describe


********************************************************************************
*5) PREPARE HISTORICAL CENSUSES
********************************************************************************

*******************
*PREPARE AMC CODES
*******************

import delim "$data/IPEADATA_Municipios_X_AMCs.csv", delim(";") case(preserve) clear	

gen estado=""
keep UFMUNDV_CHAR UFMUN NOMEMUN NEW_CODE_1970_1997 NEW_CODE_1960_1997 NEW_CODE_1940_1997 NEW_CODE_1920_1997 NEW_CODE_1872_1997 estado
rename NEW_CODE_1970_1997 amc1970
rename NEW_CODE_1960_1997 amc1960
rename NEW_CODE_1940_1997 amc1940
rename NEW_CODE_1920_1997 amc1920
rename NEW_CODE_1872_1997 amc1872
rename NOMEMUN mun

replace mun = strtrim(mun)
replace mun = strlower(mun)

*we generate the state name:
split UFMUNDV_CHAR
drop UFMUNDV_CHAR2
rename UFMUNDV_CHAR1 estado_code

replace estado="mg" if estado_code=="31"
replace estado="sp" if estado_code=="35"
replace estado="pr" if estado_code=="41"


*rename Altonia/PR:
replace mun="altonia" if mun=="altania"

*rename Vila Alta to Alto Paraiso
replace mun="alto paraiso" if mun=="vila alta"

*manual change: Mogi Guacu should be at the same AMC as Mogi Mirim (incorporated at 1877)
replace amc1872="35AMC1872_1997030" if amc1872=="35AMC1872_1997029"

*manual change: Pirapora should be at the same AMC as Curvelo (incorporated only later)
replace amc1872="31AMC1872_1997014" if amc1872=="31AMC1872_1997038"

*manual change: Porangaba should be at the same AMC as Tatui until 1920 (incorporated only later, in 1927)
replace amc1872="35AMC1872_1997051" if amc1872=="35AMC1872_1997018"
replace amc1920="35AMC2097100" if amc1920=="35AMC2097071"

*manual change: Lavrinhas should be the same AMC as Queluz in 1920 and 1940 (as they shared Pinheiros)
replace amc1920="354190" if amc1920=="352660"
replace amc1940="354190" if amc1940=="352660"

keep if estado=="pr" | estado=="sp" | estado=="mg"

save "$workingdata/amc_historic.dta", replace

*merge with the municipality area data:
import delim "$data/mun_area.csv", delim(";") case(preserve) stringcols(_all) clear	

	*keep only the three states:
	keep if NM_UF_SIGLA=="SP" | NM_UF_SIGLA=="MG" | NM_UF_SIGLA=="PR"

	*merge:
	rename CD_GCMUN UFMUN
	rename AR_MUN_2016 area
	keep UFMUN area
	drop if UFMUN==""
	destring UFMUN, replace
	replace UFMUN=floor(UFMUN/10)
	merge 1:1 UFMUN using "$workingdata/amc_historic.dta"
	keep if _m==3
	drop _m
	destring area, replace
	
	*generate the area of the AMCs:
	foreach y in 1872 1920 1940 1960 1970 {
	bysort amc`y': egen area`y'=sum(area)
	}
	drop area
	
	*and save:
	save "$workingdata/amc_historic.dta", replace
	
****************************************
*PREPARE VARIABLES FROM THE 1872 CENSUS
****************************************

import delim "$data/censo1872_data.csv", delim(";") case(preserve) clear	

*we first keep only one observation per 1872 municipality:
keep if var_num==1
keep province province_name mun_code mun_name

	*generate the estado var (for merging):
	gen estado="mg" if province==19
	replace estado="sp" if province==15
	replace estado="pr" if province==16
	
	*and change the names:
	gen mun = mun_name
	replace mun = strtrim(mun)
	replace mun = strlower(mun)
	
	*change names for the merge:
	replace mun="estrela do sul" if mun=="bagagem" & estado=="mg"
	replace mun="bonfim" if mun=="bomfim" & estado=="mg"
	replace mun="conceicao do mato dentro" if mun=="conceicao" & estado=="mg"
	replace mun="cristina" if mun=="christina" & estado=="mg"
	replace mun="curvelo" if mun=="curvello" & estado=="mg"
	replace mun="boa esperanca" if mun=="dores da boa esperanca" & estado=="mg"
	replace mun="grao mogol" if mun=="graomogol" & estado=="mg"
	replace mun="camanducaia" if mun=="jaguari" & estado=="mg"
	replace mun="januaria" if mun=="januarias" & estado=="mg"
	replace mun="mar de espanha" if mun=="mar de hespanha" & estado=="mg"
	replace mun="abaete" if mun=="marmellada" & estado=="mg"
	replace mun="monte alegre de minas" if mun=="monte alegre" & estado=="mg"
	replace mun="muriae" if mun=="muriahe" & estado=="mg"
	replace mun="piui" if mun=="piumhi" & estado=="mg"
	replace mun="rio pomba" if mun=="pomba" & estado=="mg"
	replace mun="andrelandia" if mun=="porto turvo" & estado=="mg"
	replace mun="conselheiro lafaiete" if mun=="queluz" & estado=="mg"
	replace mun="rio pardo de minas" if mun=="rio pardo" & estado=="mg"
	replace mun="vicosa" if mun=="santa rita do turvo" & estado=="mg"
	replace mun="sacramento" if mun=="santissimo sacramento" & estado=="mg"
	replace mun="patos de minas" if mun=="santo antonio de patos" & estado=="mg"
	replace mun="aracuai" if mun=="santo antonio do arassuahi" & estado=="mg"
	replace mun="sao joao batista do gloria" if mun=="sao joao baptista" & estado=="mg"
	replace mun="sao joao del rei" if mun=="sao joao d`elrei" & estado=="mg"
	replace mun="tiradentes" if mun=="sao jose d`elrei" & estado=="mg"
	replace mun="itapecerica" if mun=="tamandua" & estado=="mg"
	replace mun="varzea da palma" if mun=="guaicuhi" & estado=="mg"
	replace mun="paraisopolis" if mun=="paraiso" & estado=="mg"

	replace mun="bocaiuva do sul" if mun=="arraial queimado" & estado=="pr"
	replace mun="lapa" if mun=="principe" & estado=="pr"
	replace mun="sao jose dos pinhais" if mun=="sao jose dos pinhaes" & estado=="pr"
	replace mun="rio branco do sul" if mun=="votuverava" & estado=="pr"
		*observation: Porto de Cima was later extinct (part of Morretes), so we need to update it!!!
		
	replace mun="apiai" if mun=="apiahi" & estado=="sp"	
	replace mun="areias" if mun=="areas" & estado=="sp"	
	replace mun="batatais" if mun=="batataes" & estado=="sp"	
	replace mun="itatiba" if mun=="belem (itatiba)" & estado=="sp"	
	replace mun="descalvado" if mun=="belem do descalvado" & estado=="sp"	
	replace mun="braganca paulista" if mun=="braganca" & estado=="sp"	
	replace mun="cabreuva" if mun=="cabriuva" & estado=="sp"	
	replace mun="aracoiaba da serra" if mun=="campo largo" & estado=="sp"
	replace mun="cananeia" if mun=="cananea" & estado=="sp"	
	replace mun="itanhaem" if mun=="conceicao de itanhaem" & estado=="sp"	
	replace mun="piracicaba" if mun=="constituicao" & estado=="sp"	
	replace mun="cotia" if mun=="cutia" & estado=="sp"	
	replace mun="itapeva" if mun=="itapeva da faxina" & estado=="sp"	
	replace mun="jacarei" if mun=="jacarehi" & estado=="sp"	
	replace mun="jau" if mun=="jahu" & estado=="sp"	
	replace mun="jundiai" if mun=="jundiahi" & estado=="sp"	
	replace mun="lencois paulista" if mun=="lencoes" & estado=="sp"	
	replace mun="moji das cruzes" if mun=="mogi das cruzes" & estado=="sp"	
	replace mun="mojimirim" if mun=="mogi mirim" & estado=="sp"	
	replace mun="monte mor" if mun=="montemor" & estado=="sp"	
	replace mun="natividade da serra" if mun=="natividade" & estado=="sp"	
	replace mun="nazare paulista" if mun=="nazareth" & estado=="sp"	
	replace mun="paraibuna" if mun=="parahibuna" & estado=="sp"	
	replace mun="santana de parnaiba" if mun=="paraiba" & estado=="sp"	
	replace mun="paranapanema" if mun=="paranapanma" & estado=="sp"	
	replace mun="araras" if mun=="patrocinio de araras" & estado=="sp"	
	replace mun="itapira" if mun=="penha de mogimirim" & estado=="sp"	
	replace mun="pindamonhangaba" if mun=="pindamonhagaba" & estado=="sp"
	replace mun="pirassununga" if mun=="pirassinunga" & estado=="sp"
	replace mun="itaporanga" if mun=="rio verde" & estado=="sp"
	replace mun="santa barbara doeste" if mun=="santa barbara" & estado=="sp"
	replace mun="santa isabel" if mun=="santa izabel" & estado=="sp"
	replace mun="piracaia" if mun=="santo antonio da cachoeira" & estado=="sp"
	replace mun="sao bento do sapucai" if mun=="sao bento de sapucahimirim" & estado=="sp"
	replace mun="sao carlos" if mun=="sao carlos do pinhal" & estado=="sp"
	replace mun="sao joao da boa vista" if mun=="sao joaobatista" & estado=="sp"
	replace mun="salesopolis" if mun=="sao jose do parahitinga" & estado=="sp"
	replace mun="sao jose do barreiro" if mun=="sao jose dos bareiros" & estado=="sp"
	replace mun="sao luis do paraitinga" if mun=="sao luis de parahitinga" & estado=="sp"
	replace mun="mococa" if mun=="sao sbastiao da boa vista" & estado=="sp"
	replace mun="sarapui" if mun=="serapuhi" & estado=="sp"
	replace mun="tiete" if mun=="toete" & estado=="sp"
	replace mun="ibiuna" if mun=="una" & estado=="sp"
	replace mun="ilhabela" if mun=="villa bella" & estado=="sp"
	replace mun="eldorado" if mun=="xiririca" & estado=="sp"
	replace mun="tatui" if mun=="tatuhi" & estado=="sp"
	*observation: Santo Amaro was later extinct (part of Sao Paulo), so we need to update it!!!
	
	*now we merge:
	merge 1:1 estado mun using "$workingdata/amc_historic.dta"
	drop if _m==2
	
	*make the updates:
	replace amc1872="41AMC1872_1997005" if mun=="porto de cima" & estado=="pr"
	replace amc1872="35AMC1872_1997046" if mun=="santo amaro" & estado=="sp"
	drop _m
	drop mun
	
	*keep only areas and AMC codes:
	keep mun_code amc1872 area1872
	
	*label:
	la var amc1872 "1872 AMC"
	la var area1872 "1872 Municipality area"
	
	*save:
	save "$workingdata/census1872.dta", replace
	
*now prepare the rest of the data:
import delim "$data/censo1872_data.csv", delim(";") case(preserve) clear	

*we merge to the AMCs:
merge m:1 mun_code using "$workingdata/census1872.dta"	
drop _m
sort mun_code var_num

*now we generate the variables so we may collapse:

	*population:
	gen pop1872 = total if var_num==1
	*slave population:
	gen slave1872 = slavetotal if var_num==1
	*foreign population:
	gen foreign1872 = total if var_num==12
	*literate and illiterate:
	gen literate1872= total if var_num==13
	gen illiterate1872 = total if var_num==14
	*share of free children 6-15 in school:
	gen school1872 = freetotal if var_num==15
	gen outschool1872 = freetotal if var_num==16
	*occupation:
	gen nonag1872 = total if (var_num>=106 & var_num<=137)
	gen ag1872 = total if var_num==138 | var_num==139
	
*collapse by AMC:
collapse (rawsum) pop1872 slave1872 foreign1872 literate1872 illiterate1872 school1872 outschool1872 ag1872 nonag1872 (firstnm) area1872, by(amc1872)	

*generate the variables:

	*gen population density:
	la var pop1872 "Population, 1872"
	gen pop_density1872 = pop1872/area1872
	la var pop_density1872 "Population density, 1872"
	*share of slaves:
	gen sh_slave1872 = slave1872/pop1872
	la var sh_slave1872 "Share of slaves, 1872"
	*share of foreign:
	gen sh_foreign1872 = foreign1872/pop1872
	la var sh_foreign1872 "Share of foreigners, 1872"
	*share of literate:
	gen sh_literate1872=literate1872/(literate1872+illiterate1872)
	la var sh_literate1872 "Literacy rate, 1872"
	*share in school:
	gen sh_school1872=school1872/(school1872+outschool1872)
	la var sh_school1872 "Share of 6-15 free children in school, 1872"
	*share non ag (exlcudes domestic labor):
	gen sh_nonag1872=nonag1872/(ag1872+nonag1872)
	la var sh_nonag1872 "Share non-agricultural employment, 1872"
	
*keep the variables:
keep pop1872 pop_density1872 sh_*1872 amc1872	

*and save:
save "$workingdata/census1872.dta", replace	

***********************************************************
*PREPARE VARIABLE ON PREDICTED HISTORIC POPULATION DENSITY
***********************************************************

****************************************
*1872 AMC level: 1872, 1890, 1900, 1920*
****************************************

*use the 1872 data:
use "$workingdata/census1872.dta", clear

	*keep the data:
	keep pop_density1872 amc1872
	
	*save:
	save "$workingdata/density1872.dta", replace
	
*import 1890 data:
import delim "$data/pop1890.csv", delim(";") case(preserve) clear	

	*change names for the merge:
	replace mun="estrela do sul" if mun=="bagagem" & estado=="mg"
	replace mun="bonfim" if mun=="bomfim" & estado=="mg"
	replace mun="conceicao do mato dentro" if mun=="conceicao" & estado=="mg"
	replace mun="cristina" if mun=="christina" & estado=="mg"
	replace mun="curvelo" if mun=="curvello" & estado=="mg"
	replace mun="mar de espanha" if mun=="mar de hespanha" & estado=="mg"
	replace mun="monte alegre de minas" if mun=="monte alegre" & estado=="mg"
	replace mun="muriae" if mun=="muriahe" & estado=="mg"
	replace mun="rio pomba" if mun=="pomba" & estado=="mg"
	replace mun="andrelandia" if mun=="porto do turvo" & estado=="mg"
	replace mun="conselheiro lafaiete" if mun=="queluz" & estado=="mg"
	replace mun="rio pardo de minas" if mun=="rio pardo" & estado=="mg"
	replace mun="aracuai" if mun=="arassuahy" & estado=="mg"
	replace mun="sao joao batista do gloria" if mun=="sao joao baptista" & estado=="mg"
	replace mun="sao joao del rei" if mun=="sao joao delrey" & estado=="mg"
	replace mun="araguari" if mun=="araguary" & estado=="mg"
	replace mun="aiuruoca" if mun=="ayuruoca" & estado=="mg"
	replace mun="baependi" if mun=="baependy" & estado=="mg"
	replace mun="bambui" if mun=="bambuhy" & estado=="mg"
	replace mun="bela vista de minas" if mun=="boa vista" & estado=="mg"
	replace mun="bocaiuva" if mun=="bocayuva" & estado=="mg"
	replace mun="caete" if mun=="caethe" & estado=="mg"
	replace mun="cambui" if mun=="cambuhy" & estado=="mg"
	replace mun="campo belo" if mun=="campo bello" & estado=="mg"
	replace mun="andradas" if mun=="caracol" & estado=="mg"
	replace mun="monte carmelo" if mun=="carmo da bagagem" & estado=="mg"
	replace mun="carmo do paranaiba" if mun=="carmo do paranahyba" & estado=="mg"
	replace mun="cataguases" if mun=="cataguazes" & estado=="mg"
	replace mun="dores do indaia" if mun=="dores do indaya" & estado=="mg"
	replace mun="entre rios de minas" if mun=="entre rios" & estado=="mg"
	replace mun="frutal" if mun=="fructal" & estado=="mg"
	replace mun="jacui" if mun=="jacuhy" & estado=="mg"
	replace mun="camanducaia" if mun=="jaguary" & estado=="mg"
	replace mun="manhuacu" if mun=="manhuassu" & estado=="mg"
	replace mun="muzambinho" if mun=="musambinho" & estado=="mg"
	replace mun="santos dumont" if mun=="palmyra" & estado=="mg"
	replace mun="para de minas" if mun=="para" & estado=="mg"
	replace mun="passa quatro" if mun=="passaquatro" & estado=="mg"
	replace mun="pedralva" if mun=="pedra branca" & estado=="mg"
	replace mun="pitangui" if mun=="pitanguy" & estado=="mg"
	replace mun="piui" if mun=="piumhy" & estado=="mg"
	replace mun="cassia" if mun=="santa rita de cassia" & estado=="mg"
	replace mun="santa rita do sapucai" if mun=="santa rita do sapucahy" & estado=="mg"
	replace mun="ferros" if mun=="santanna dos ferros" & estado=="mg"
	replace mun="salinas" if mun=="santo antonio das salinas" & estado=="mg"
	replace mun="gouvea" if mun=="santo antonio de gouvea" & estado=="mg"
	replace mun="machado" if mun=="santo antonio do machado" & estado=="mg"
	replace mun="pecanha" if mun=="santo antonio do pecanha" & estado=="mg"
	replace mun="patos de minas" if mun=="santo antonio dos patos" & estado=="mg"
	replace mun="sao goncalo do sapucai" if mun=="sao goncalo do sapucahy" & estado=="mg"
	replace mun="caratinga" if mun=="sao joao do caratinga" & estado=="mg"
	replace mun="alem paraiba" if mun=="sao jose dalem parahyba" & estado=="mg"
	replace mun="paraisopolis" if mun=="sao jose do paraiso" & estado=="mg"
	replace mun="guanhaes" if mun=="sao miguel de guanhaes" & estado=="mg"
	replace mun="teofilo otoni" if mun=="theophilo ottoni" & estado=="mg"
	replace mun="tres coracoes" if mun=="tres coracoes do rio verde" & estado=="mg"

	replace mun="sao jose dos pinhais" if mun=="sao jose dos pinhaes" & estado=="pr"
	replace mun="rio branco do sul" if mun=="votuverava" & estado=="pr"
	replace mun="cerro azul" if mun=="serro azul" & estado=="pr"
	replace mun="bocaiuva do sul" if mun=="bocayuva" & estado=="pr"
	replace mun="curitiba" if mun=="corityba" & estado=="pr"
	replace mun="piraquara" if mun=="deodoro" & estado=="pr"
	replace mun="entre rios do oeste" if mun=="entre rios" & estado=="pr"
	replace mun="campina grande do sul" if mun=="glycerio" & estado=="pr"
	replace mun="guaraquecaba" if mun=="guarakessava" & estado=="pr"
	replace mun="jaguariaiva" if mun=="jaguaryahiva" & estado=="pr"
	replace mun="morretes" if mun=="morrtes" & estado=="pr"
	replace mun="pirai do sul" if mun=="pirahy" & estado=="pr"
	replace mun="rio branco do sul" if mun=="votuverava" & estado=="pr"
	replace mun="sao joao do triunfo" if mun=="sao joao do triumpho" & estado=="pr"
	replace mun="tomazina" if mun=="thomazina" & estado=="pr"
	replace mun="almirante tamandare" if mun=="tamandare" & estado=="pr"
	replace mun="tibagi" if mun=="tibagy" & estado=="pr"
	replace mun="uniao da vitoria" if mun=="uniao da victoria" & estado=="pr"
	*observation: Porto de Cima was later extinct (part of Morretes), so we need to update it!!!
	*observation 2: Conchas was later extinct (part of Ponta Grossa), so we need to update it!!!
	*observation 3: Assunguy de Cima was later extinct (part of Cerro Azul), so we need to update it!!!
		
	replace mun="cachoeira paulista" if mun=="bocaina" & estado=="sp"	
	replace mun="apiai" if mun=="apiahy" & estado=="sp"	
	replace mun="batatais" if mun=="batataes" & estado=="sp"	
	replace mun="descalvado" if mun=="belem do descalvado" & estado=="sp"	
	replace mun="braganca paulista" if mun=="braganca" & estado=="sp"	
	replace mun="cananeia" if mun=="cananea" & estado=="sp"		
	replace mun="cotia" if mun=="cutia" & estado=="sp"	
	replace mun="itapeva" if mun=="itapeva da faxina" & estado=="sp"	
	replace mun="jacarei" if mun=="jacarehy" & estado=="sp"	
	replace mun="jau" if mun=="jahu" & estado=="sp"	
	replace mun="jundiai" if mun=="jundiahy" & estado=="sp"	
	replace mun="lencois paulista" if mun=="lencoes" & estado=="sp"	
	replace mun="moji das cruzes" if mun=="mogy das cruzes" & estado=="sp"	
	replace mun="mojimirim" if mun=="mogymirim" & estado=="sp"	
	replace mun="monte mor" if mun=="montemor" & estado=="sp"	
	replace mun="natividade da serra" if mun=="natividade" & estado=="sp"	
	replace mun="nazare paulista" if mun=="nazareth" & estado=="sp"	
	replace mun="paraibuna" if mun=="parahybuna" & estado=="sp"	
	replace mun="santana de parnaiba" if mun=="parnahyba" & estado=="sp"	
	replace mun="santa barbara doeste" if mun=="santa barbara" & estado=="sp"
	replace mun="piracaia" if mun=="santo antonio da cachoeira" & estado=="sp"
	replace mun="sao bento do sapucai" if mun=="sao bento do sapucahy" & estado=="sp"
	replace mun="sao carlos" if mun=="sao carlos do pinhal" & estado=="sp"
	replace mun="sao joao da boa vista" if mun=="sao joao baptista da boa vista" & estado=="sp"
	replace mun="salesopolis" if mun=="sao jose do parahytinga" & estado=="sp"
	replace mun="sao luis do paraitinga" if mun=="sao luiz do parahytinga" & estado=="sp"
	replace mun="sarapui" if mun=="sarapuhy" & estado=="sp"
	replace mun="ibiuna" if mun=="una" & estado=="sp"
	replace mun="ilhabela" if mun=="villa bella" & estado=="sp"
	replace mun="eldorado" if mun=="xiririca" & estado=="sp"
	replace mun="tatui" if mun=="tatuhy" & estado=="sp"
	replace mun="bariri" if mun=="barery" & estado=="sp"
	replace mun="bom sucesso de itarare" if mun=="bom sucesso" & estado=="sp"
	replace mun="monteiro lobato" if mun=="buquira" & estado=="sp"
	replace mun="aracoiaba da serra" if mun=="campo largo de sorocaba" & estado=="sp"
	replace mun="campos novos paulista" if mun=="campos novos do paranapanema" & estado=="sp"
	replace mun="capivari" if mun=="capivary" & estado=="sp"
	replace mun="ituverava" if mun=="carmo da franca" & estado=="sp"
	replace mun="guarulhos" if mun=="conceicao dos guarulhos" & estado=="sp"
	replace mun="angatuba" if mun=="espirito santo da boa vista" & estado=="sp"
	replace mun="bauru" if mun=="espirito santo da fortaleza" & estado=="sp"
	replace mun="nuporanga" if mun=="espirito santo de batataes" & estado=="sp"
	replace mun="barretos" if mun=="espirito santo dos barretos" & estado=="sp"
	replace mun="guarei" if mun=="guarehy" & estado=="sp"
	replace mun="itapecerica da serra" if mun=="itapecerica" & estado=="sp"
	replace mun="itapira" if mun=="itapyra" & estado=="sp"
	replace mun="mairipora" if mun=="juquery" & estado=="sp"
	replace mun="lavrinhas" if mun=="lavrinha" & estado=="sp"
	replace mun="mogi guacu" if mun=="mogyguassu" & estado=="sp"
	replace mun="patrocinio paulista" if mun=="patrocinio de sapucahy" & estado=="sp"
	replace mun="igarata" if mun=="patrocinio" & estado=="sp"
	replace mun="redencao da serra" if mun=="redempcao" & estado=="sp"
	replace mun="bofete" if mun=="rio bonito" & estado=="sp"
	replace mun="avare" if mun=="rio novo" & estado=="sp"
	replace mun="salto" if mun=="salto do itu" & estado=="sp"
	replace mun="aguas de santa barbara" if mun=="santa barbara do rio pardo" & estado=="sp"
	replace mun="igarapava" if mun=="santa rita do paraiso" & estado=="sp"
	replace mun="santa rita do passa quatro" if mun=="santa rita do passaquatro" & estado=="sp"
	replace mun="sao bernardo do campo" if mun=="sao bernardo" & estado=="sp"
	replace mun="itaporanga" if mun=="sao joao baptista do rio verde" & estado=="sp"
	replace mun="sao manuel" if mun=="sao manoel do paraiso" & estado=="sp"
	replace mun="piraju" if mun=="sao sebastiao do tijuco preto" & estado=="sp"
	replace mun="socorro" if mun=="soccorro" & estado=="sp"
	replace mun="iporanga" if mun=="yporanga" & estado=="sp"
	*observation: Santo Amaro was later extinct (part of Sao Paulo), so we need to update it!!!
	*observation 2: sao francisco de paula dos pinheiros (part of Lavrinhas) was later extintc, so we need to update it!!!
	*observation 3: Jatai was later exticnt (part of Cachoeira Paulista), so we need to update it!!!
	
	*now we merge:
	merge 1:1 estado mun using "$workingdata/amc_historic.dta"
	drop if _m==2
	
	*make the updates:
	replace amc1872="41AMC1872_1997005" if mun=="porto de cima" & estado=="pr"
	replace amc1872="41AMC1872_1997002" if mun=="conchas" & estado=="pr"
	replace amc1872="41AMC1872_1997001" if mun=="assunguy de cima" & estado=="pr"
	replace amc1872="35AMC1872_1997046" if mun=="santo amaro" & estado=="sp"
	replace amc1872="35AMC1872_1997035" if mun=="sao francisco de paula" & estado=="sp"
	replace amc1872="35AMC1872_1997026" if mun=="jatahy" & estado=="sp"
	drop _m
	drop mun
	
	*collapse:
	collapse (rawsum) pop1890 (firstnm) area1872, by(amc1872)	
	
	*generate pop density:
	gen pop_density1890=pop1890/area1872
	keep amc1872 pop_density1890
	drop if amc1872==""
	
	*merge and save:
	merge 1:1 amc1872 using "$workingdata/density1872.dta"
	drop _m
	save "$workingdata/density1872.dta", replace
	
	
*import 1900 data:
import delim "$data/pop1900.csv", delim(";") case(preserve) clear	

	*change names for the merge:
	replace mun="estrela do sul" if mun=="bagagem" & estado=="mg"
	replace mun="bonfim" if mun=="bomfim" & estado=="mg"
	replace mun="cristina" if mun=="christina" & estado=="mg"
	replace mun="curvelo" if mun=="curvello" & estado=="mg"
	replace mun="mar de espanha" if mun=="mar de hespanha" & estado=="mg"
	replace mun="monte alegre de minas" if mun=="monte alegre" & estado=="mg"
	replace mun="rio pomba" if mun=="pomba" & estado=="mg"
	replace mun="conselheiro lafaiete" if mun=="queluz" & estado=="mg"
	replace mun="rio pardo de minas" if mun=="rio pardo" & estado=="mg"
	replace mun="aracuai" if mun=="arassuahy" & estado=="mg"
	replace mun="sao joao batista do gloria" if mun=="sao joao baptista" & estado=="mg"
	replace mun="sao joao del rei" if mun=="sao joao delrey" & estado=="mg"
	replace mun="araguari" if mun=="araguary" & estado=="mg"
	replace mun="aiuruoca" if mun=="ayuruoca" & estado=="mg"
	replace mun="baependi" if mun=="baependy" & estado=="mg"
	replace mun="bambui" if mun=="bambuhy" & estado=="mg"
	replace mun="bocaiuva" if mun=="bocayuva" & estado=="mg"
	replace mun="caete" if mun=="caethe" & estado=="mg"
	replace mun="cambui" if mun=="cambuhy" & estado=="mg"
	replace mun="campo belo" if mun=="campo bello" & estado=="mg"
	replace mun="andradas" if mun=="caracol" & estado=="mg"
	replace mun="monte carmelo" if mun=="monte carmello" & estado=="mg"
	replace mun="carmo do paranaiba" if mun=="carmo do paranahyba" & estado=="mg"
	replace mun="cataguases" if mun=="cataguazes" & estado=="mg"
	replace mun="dores do indaia" if mun=="dores do indaya" & estado=="mg"
	replace mun="entre rios de minas" if mun=="entre rios" & estado=="mg"
	replace mun="frutal" if mun=="fructal" & estado=="mg"
	replace mun="jacui" if mun=="jacuhy" & estado=="mg"
	replace mun="camanducaia" if mun=="jaguary" & estado=="mg"
	replace mun="manhuacu" if mun=="manhuassu" & estado=="mg"
	replace mun="santos dumont" if mun=="palmyra" & estado=="mg"
	replace mun="para de minas" if mun=="para" & estado=="mg"
	replace mun="pedralva" if mun=="pedra branca" & estado=="mg"
	replace mun="pitangui" if mun=="pitanguy" & estado=="mg"
	replace mun="piui" if mun=="piumhy" & estado=="mg"
	replace mun="cassia" if mun=="santa rita de cassia" & estado=="mg"
	replace mun="santa rita do sapucai" if mun=="santa rita do sapucahy" & estado=="mg"
	replace mun="salinas" if mun=="santo antonio das salinas" & estado=="mg"
	replace mun="gouvea" if mun=="santo antonio de gouvea" & estado=="mg"
	replace mun="machado" if mun=="santo antonio do machado" & estado=="mg"
	replace mun="patos de minas" if mun=="santo antonio dos patos" & estado=="mg"
	replace mun="sao goncalo do sapucai" if mun=="sao goncalo do sapucahy" & estado=="mg"
	replace mun="alem paraiba" if mun=="alem parahyba" & estado=="mg"
	replace mun="paraisopolis" if mun=="sao jose do paraiso" & estado=="mg"
	replace mun="teofilo otoni" if mun=="theophilo ottoni" & estado=="mg"
	replace mun="tres coracoes" if mun=="tres coracoes do rio verde" & estado=="mg"
	replace mun="belo horizonte" if mun=="bello horizonte" & estado=="mg"
	replace mun="monte azul" if mun=="boa vista do tremedal" & estado=="mg"
	replace mun="conceicao do mato dentro" if mun=="conceicao do serro" & estado=="mg"
	replace mun="conceicao do rio verde" if mun=="contendas" & estado=="mg"
	replace mun="boa esperanca" if mun=="dores da boa esperanca" & estado=="mg"
	replace mun="itabira" if mun=="itabira do matto dentro" & estado=="mg"
	replace mun="monte santo de minas" if mun=="monte santo" & estado=="mg"
	replace mun="nova lima" if mun=="nova de lima" & estado=="mg"
	replace mun="patos de minas" if mun=="patos" & estado=="mg"
	replace mun="piranga" if mun=="pyranga" & estado=="mg"
	replace mun="visconde do rio branco" if mun=="rio branco" & estado=="mg"
	replace mun="santa luzia" if mun=="santa luzia do rio das velhas" & estado=="mg"
	replace mun="paraisopolis" if mun=="sao jose do paraizo" & estado=="mg"
	replace mun="eugenopolis" if mun=="sao manoel" & estado=="mg"
	replace mun="muriae" if mun=="sao paulo do muriahe" & estado=="mg"
	replace mun="uberlandia" if mun=="sao pedro de uberabinha" & estado=="mg"
	replace mun="sao sebastiao do paraiso" if mun=="sao sebastiao do paraizo" & estado=="mg"
	replace mun="andrelandia" if mun=="turvo" & estado=="mg"

	replace mun="sao jose dos pinhais" if mun=="sao jose dos pinhaes" & estado=="pr"
	replace mun="rio branco do sul" if mun=="votuverava" & estado=="pr"
	replace mun="cerro azul" if mun=="serro azul" & estado=="pr"
	replace mun="bocaiuva do sul" if mun=="bocayuva" & estado=="pr"
	replace mun="curitiba" if mun=="curityba" & estado=="pr"
	replace mun="piraquara" if mun=="deodoro" & estado=="pr"
	replace mun="entre rios do oeste" if mun=="entre rios" & estado=="pr"
	replace mun="jaguariaiva" if mun=="jaguaryahiva" & estado=="pr"
	replace mun="pirai do sul" if mun=="pirahy" & estado=="pr"
	replace mun="sao joao do triunfo" if mun=="sao joao do triumpho" & estado=="pr"
	replace mun="tomazina" if mun=="thomazina" & estado=="pr"
	replace mun="almirante tamandare" if mun=="tamandare" & estado=="pr"
	replace mun="tibagi" if mun=="tibagy" & estado=="pr"
	replace mun="uniao da vitoria" if mun=="uniao da victoria" & estado=="pr"
	replace mun="clevelandia" if mun=="bella vista de palmas" & estado=="pr"
	replace mun="campina grande do sul" if mun=="campina grande" & estado=="pr"
	replace mun="ribeirao claro" if mun=="espirito santo do itarare" & estado=="pr"
	replace mun="guaraquecaba" if mun=="guarakesaba" & estado=="pr"
	replace mun="jacarezinho" if mun=="nova alcantara" & estado=="pr"
	replace mun="palmas" if mun=="palma" & estado=="pr"
	replace mun="capanema" if mun=="sao joao de capanema" & estado=="pr"
	replace mun="ipiranga" if mun=="ypiranga" & estado=="pr"
	*observation: Porto de Cima was later extinct (part of Morretes), so we need to update it!!!
	*observation 2: Conchas was later extinct (part of Ponta Grossa), so we need to update it!!!
	*observation 3: Assunguy de Cima was later extinct (part of Cerro Azul), so we need to update it!!!
		
	replace mun="cachoeira paulista" if mun=="bocaina" & estado=="sp"	
	replace mun="apiai" if mun=="apiahy" & estado=="sp"	
	replace mun="batatais" if mun=="batataes" & estado=="sp"	
	replace mun="descalvado" if mun=="belem do descalvado" & estado=="sp"	
	replace mun="braganca paulista" if mun=="braganca" & estado=="sp"		
	replace mun="cotia" if mun=="cutia" & estado=="sp"	
	replace mun="jacarei" if mun=="jacarehy" & estado=="sp"	
	replace mun="jau" if mun=="jahu" & estado=="sp"	
	replace mun="jundiai" if mun=="jundiahy" & estado=="sp"	
	replace mun="lencois paulista" if mun=="lencoes" & estado=="sp"	
	replace mun="moji das cruzes" if mun=="mogy das cruzes" & estado=="sp"	
	replace mun="mojimirim" if mun=="mogymirim" & estado=="sp"	
	replace mun="natividade da serra" if mun=="natividade" & estado=="sp"	
	replace mun="nazare paulista" if mun=="nazareth" & estado=="sp"	
	replace mun="paraibuna" if mun=="parahybuna" & estado=="sp"	
	replace mun="santana de parnaiba" if mun=="parnahyba" & estado=="sp"	
	replace mun="santa barbara doeste" if mun=="santa barbara" & estado=="sp"
	replace mun="piracaia" if mun=="santo antonio da cachoeira" & estado=="sp"
	replace mun="sao bento do sapucai" if mun=="sao bento do sapucahy" & estado=="sp"
	replace mun="sao carlos" if mun=="sao carlos do pinhal" & estado=="sp"
	replace mun="salesopolis" if mun=="sao jose do parahytinga" & estado=="sp"
	replace mun="sao luis do paraitinga" if mun=="sao luiz do parahytinga" & estado=="sp"
	replace mun="sarapui" if mun=="sarapuhy" & estado=="sp"
	replace mun="ibiuna" if mun=="una" & estado=="sp"
	replace mun="ilhabela" if mun=="villa bella" & estado=="sp"
	replace mun="eldorado" if mun=="xiririca" & estado=="sp"
	replace mun="tatui" if mun=="tatuhy" & estado=="sp"
	replace mun="bariri" if mun=="bariry" & estado=="sp"
	replace mun="bom sucesso de itarare" if mun=="bom sucesso" & estado=="sp"
	replace mun="monteiro lobato" if mun=="buquira" & estado=="sp"
	replace mun="aracoiaba da serra" if mun=="campo largo de sorocaba" & estado=="sp"
	replace mun="campos novos paulista" if mun=="campos novos do paranapanema" & estado=="sp"
	replace mun="capivari" if mun=="capivary" & estado=="sp"
	replace mun="angatuba" if mun=="espirito santo da boa vista" & estado=="sp"
	replace mun="barretos" if mun=="espirito santo dos barretos" & estado=="sp"
	replace mun="guarei" if mun=="guarehy" & estado=="sp"
	replace mun="itapecerica da serra" if mun=="itapecerica" & estado=="sp"
	replace mun="mairipora" if mun=="juquery" & estado=="sp"
	replace mun="mogi guacu" if mun=="mogyguassu" & estado=="sp"
	replace mun="patrocinio paulista" if mun=="patrocinio do sapucahy" & estado=="sp"
	replace mun="redencao da serra" if mun=="redempcao" & estado=="sp"
	replace mun="bofete" if mun=="rio bonito" & estado=="sp"
	replace mun="salto" if mun=="salto de ytu" & estado=="sp"
	replace mun="aguas de santa barbara" if mun=="santa barbara do rio pardo" & estado=="sp"
	replace mun="igarapava" if mun=="santa rita do paraiso" & estado=="sp"
	replace mun="sao bernardo do campo" if mun=="sao bernardo" & estado=="sp"
	replace mun="sao manuel" if mun=="sao manoel" & estado=="sp"
	replace mun="socorro" if mun=="soccorro" & estado=="sp"
	replace mun="iporanga" if mun=="yporanga" & estado=="sp"
	replace mun="analandia" if mun=="annapoles" & estado=="sp"
	replace mun="areias" if mun=="areas" & estado=="sp"
	replace mun="itapolis" if mun=="boa vista das pedras" & estado=="sp"
	replace mun="capao bonito" if mun=="capao bonito do paranapanema" & estado=="sp"
	replace mun="itanhaem" if mun=="conceicao de itanhaem" & estado=="sp"
	replace mun="monte alegre do sul" if mun=="conceicao de monte alegre" & estado=="sp"
	replace mun="joanopolis" if mun=="curralinho" & estado=="sp"
	replace mun="itapeva" if mun=="faxina" & estado=="sp"
	replace mun="jardinopolis" if mun=="jardinopoles" & estado=="sp"
	replace mun="mineiros do tiete" if mun=="mineiros" & estado=="sp"
	replace mun="igarata" if mun=="patrocinio de santa izabel" & estado=="sp"
	replace mun="pedreira" if mun=="pedreiras" & estado=="sp"
	replace mun="pilar do sul" if mun=="pilar" & estado=="sp"
	replace mun="piraju" if mun=="piraja" & estado=="sp"
	replace mun="pirapora do bom jesus" if mun=="pirapora" & estado=="sp"
	replace mun="anhembi" if mun=="remedios da ponte do tiete" & estado=="sp"
	replace mun="taquaritinga" if mun=="ribeiraosinho" & estado=="sp"
	replace mun="sao jose do rio preto" if mun=="rio preto" & estado=="sp"
	replace mun="itai" if mun=="santo antonio da boa vista" & estado=="sp"
	replace mun="bocaina" if mun=="sao joao da bocaina" & estado=="sp"
	replace mun="sao jose do barreiro" if mun=="sao jose dos barreiros" & estado=="sp"
	replace mun="agudos" if mun=="sao paulo dos agudos" & estado=="sp"
	replace mun="sertaozinho" if mun=="sertaosinho" & estado=="sp"
	replace mun="tambau" if mun=="tambahu" & estado=="sp"
	replace mun="piquete" if mun=="villa vieira do piquete" & estado=="sp"
	replace mun="itu" if mun=="ytu" & estado=="sp"
	replace mun="boa esperanca do sul" if mun=="boa esperanca" & estado=="sp"
	*observation: Santo Amaro was later extinct (part of Sao Paulo), so we need to update it!!!
	*observation 2: Pinheiros (part of Queluz) was later extintc, so we need to update it!!!
	*observation 3: Jatai was later exticnt (part of Cachoeira Paulista), so we need to update it!!!
	
	*now we merge:
	merge 1:1 estado mun using "$workingdata/amc_historic.dta"	
	drop if _m==2
	
	*make the updates:
	replace amc1872="41AMC1872_1997005" if mun=="porto de cima" & estado=="pr"
	replace amc1872="41AMC1872_1997002" if mun=="conchas" & estado=="pr"
	replace amc1872="41AMC1872_1997001" if mun=="assunguy de cima" & estado=="pr"
	replace amc1872="35AMC1872_1997046" if mun=="santo amaro" & estado=="sp"
	replace amc1872="35AMC1872_1997035" if mun=="pinheiros" & estado=="sp"
	replace amc1872="35AMC1872_1997026" if mun=="jatahy" & estado=="sp"
	drop _m
	drop mun
	
	*collapse:
	collapse (rawsum) pop1900 (firstnm) area1872, by(amc1872)	
	
	*generate pop density:
	gen pop_density1900=pop1900/area1872
	keep amc1872 pop_density1900
	drop if amc1872==""
	
	*merge and save:
	merge 1:1 amc1872 using "$workingdata/density1872.dta"
	drop _m
	save "$workingdata/density1872.dta", replace
	
*import 1920 data:
import delim "$data/pop1920.csv", delim(";") case(preserve) clear	

	*change names for the merge:
	replace mun="bonfim" if mun=="bomfim" & estado=="mg"
	replace mun="cristina" if mun=="christina" & estado=="mg"
	replace mun="curvelo" if mun=="curvello" & estado=="mg"
	replace mun="mar de espanha" if mun=="mar de hespanha" & estado=="mg"
	replace mun="monte alegre de minas" if mun=="monte alegre" & estado=="mg"
	replace mun="rio pomba" if mun=="pomba" & estado=="mg"
	replace mun="conselheiro lafaiete" if mun=="queluz" & estado=="mg"
	replace mun="rio pardo de minas" if mun=="rio pardo" & estado=="mg"
	replace mun="aracuai" if mun=="arassuahy" & estado=="mg"
	replace mun="sao joao batista do gloria" if mun=="sao joao baptista" & estado=="mg"
	replace mun="sao joao del rei" if mun=="sao joao delrey" & estado=="mg"
	replace mun="araguari" if mun=="araguary" & estado=="mg"
	replace mun="aiuruoca" if mun=="ayuruoca" & estado=="mg"
	replace mun="baependi" if mun=="baependy" & estado=="mg"
	replace mun="bambui" if mun=="bambuhy" & estado=="mg"
	replace mun="bocaiuva" if mun=="bocayuva" & estado=="mg"
	replace mun="caete" if mun=="caethe" & estado=="mg"
	replace mun="cambui" if mun=="cambuhy" & estado=="mg"
	replace mun="campo belo" if mun=="campo bello" & estado=="mg"
	replace mun="andradas" if mun=="caracol" & estado=="mg"
	replace mun="monte carmelo" if mun=="monte carmello" & estado=="mg"
	replace mun="carmo do paranaiba" if mun=="carmo do paranahyba" & estado=="mg"
	replace mun="cataguases" if mun=="cataguazes" & estado=="mg"
	replace mun="dores do indaia" if mun=="dores do indaya" & estado=="mg"
	replace mun="entre rios de minas" if mun=="entre rios" & estado=="mg"
	replace mun="frutal" if mun=="fructal" & estado=="mg"
	replace mun="jacui" if mun=="jacuhy" & estado=="mg"
	replace mun="camanducaia" if mun=="jaguary" & estado=="mg"
	replace mun="manhuacu" if mun=="manhuassu" & estado=="mg"
	replace mun="santos dumont" if mun=="palmyra" & estado=="mg"
	replace mun="para de minas" if mun=="para" & estado=="mg"
	replace mun="pedralva" if mun=="pedra branca" & estado=="mg"
	replace mun="pitangui" if mun=="pitanguy" & estado=="mg"
	replace mun="piui" if mun=="piumhy" & estado=="mg"
	replace mun="santa rita do sapucai" if mun=="santa rita do sapucahy" & estado=="mg"
	replace mun="machado" if mun=="santo antonio do machado" & estado=="mg"
	replace mun="sao goncalo do sapucai" if mun=="sao goncalo do sapucahy" & estado=="mg"
	replace mun="alem paraiba" if mun=="alem parahyba" & estado=="mg"
	replace mun="teofilo otoni" if mun=="theophilo ottoni" & estado=="mg"
	replace mun="tres coracoes" if mun=="tres coracoes do rio verde" & estado=="mg"
	replace mun="belo horizonte" if mun=="bello horizonte" & estado=="mg"
	replace mun="monte azul" if mun=="boa vista do tremedal" & estado=="mg"
	replace mun="conceicao do mato dentro" if mun=="conceicao do serro" & estado=="mg"
	replace mun="boa esperanca" if mun=="dores da boa esperanca" & estado=="mg"
	replace mun="monte santo de minas" if mun=="monte santo" & estado=="mg"
	replace mun="patos de minas" if mun=="patos" & estado=="mg"
	replace mun="visconde do rio branco" if mun=="rio branco" & estado=="mg"
	replace mun="santa luzia" if mun=="santa luzia do rio das velhas" & estado=="mg"
	replace mun="muriae" if mun=="sao paulo do muriahe" & estado=="mg"
	replace mun="andrelandia" if mun=="turvo" & estado=="mg"
	replace mun="tupaciguara" if mun=="abadia do bom sucesso" & estado=="mg"
	replace mun="lambari" if mun=="aguas virtuosas" & estado=="mg"
	replace mun="aimores" if mun=="aymores" & estado=="mg"
	replace mun="campos gerais" if mun=="campos geraes" & estado=="mg"
	replace mun="capelinha" if mun=="capellinha" & estado=="mg"
	replace mun="eloi mendes" if mun=="eloy mendes" & estado=="mg"
	replace mun="estrela do sul" if mun=="estrella do sul" & estado=="mg"
	replace mun="fortaleza de minas" if mun=="fortaleza" & estado=="mg"
	replace mun="guarani" if mun=="guarany" & estado=="mg"
	replace mun="coracao de jesus" if mun=="inconfidencia" & estado=="mg"
	replace mun="ituiutaba" if mun=="ituyutaba" & estado=="mg"
	replace mun="mariana" if mun=="marianna" & estado=="mg"
	replace mun="paraguacu" if mun=="paraguassu" & estado=="mg"
	replace mun="pequi" if mun=="pequy" & estado=="mg"
	replace mun="ipanema" if mun=="rio jose pedro" & estado=="mg"
	replace mun="esmeraldas" if mun=="santa quiteria" & estado=="mg"
	replace mun="ferros" if mun=="santanna dos ferros" & estado=="mg"
	replace mun="sao gotardo" if mun=="sao gothardo" & estado=="mg"
	replace mun="botelhos" if mun=="sao jose dos botelhos" & estado=="mg"
	replace mun="eugenopolis" if mun=="sao manuel" & estado=="mg"
	replace mun="mutum" if mun=="sao manuel do mutum" & estado=="mg"
	replace mun="guanhaes" if mun=="sao miguel de guanhaes" & estado=="mg"
	replace mun="carmo de minas" if mun=="silvestre ferraz" & estado=="mg"
	replace mun="uberlandia" if mun=="uberabinha" & estado=="mg"
	replace mun="brasopolis" if mun=="villa braz" & estado=="mg"
	replace mun="brasilia de minas" if mun=="villa brazilia" & estado=="mg"
	replace mun="cambuquira" if mun=="villa de cambuqueira" & estado=="mg"
	replace mun="jequitinhonha" if mun=="villa jequitinhona" & estado=="mg"
	replace mun="nova lima" if mun=="villa nova de lima" & estado=="mg"
	replace mun="nova resende" if mun=="villa nova de rezende" & estado=="mg"
	replace mun="resende costa" if mun=="villa rezende costa" & estado=="mg"
	replace mun="rio espera" if mun=="villa rio espera" & estado=="mg"
	replace mun="nepomuceno" if mun=="villa nepomuceno" & estado=="mg"

	replace mun="sao jose dos pinhais" if mun=="sao jose dos pinhaes" & estado=="pr"
	replace mun="cerro azul" if mun=="serro azul" & estado=="pr"
	replace mun="bocaiuva do sul" if mun=="bocayuva" & estado=="pr"
	replace mun="curitiba" if mun=="curityba" & estado=="pr"
	replace mun="piraquara" if mun=="deodoro" & estado=="pr"
	replace mun="entre rios do oeste" if mun=="entre rios" & estado=="pr"
	replace mun="pirai do sul" if mun=="pirahy" & estado=="pr"
	replace mun="sao joao do triunfo" if mun=="sao joao do triumpho" & estado=="pr"
	replace mun="tomazina" if mun=="thomazina" & estado=="pr"
	replace mun="almirante tamandare" if mun=="tamandare" & estado=="pr"
	replace mun="tibagi" if mun=="tibagy" & estado=="pr"
	replace mun="uniao da vitoria" if mun=="uniao da victoria" & estado=="pr"
	replace mun="campina grande do sul" if mun=="campina grande" & estado=="pr"
	replace mun="ipiranga" if mun=="ypiranga" & estado=="pr"
	replace mun="foz do iguacu" if mun=="foz do iguassu" & estado=="pr"
	replace mun="guaraquecaba" if mun=="guarakessaba" & estado=="pr"
	replace mun="irati" if mun=="iraty" & estado=="pr"
	replace mun="jaguariaiva" if mun=="jaguariahyva" & estado=="pr"
	replace mun="marumbi" if mun=="marumby" & estado=="pr"
	replace mun="rio branco do ivai" if mun=="rio branco" & estado=="pr"
	replace mun="imbituva" if mun=="santo antonio do imbituva" & estado=="pr"
	replace mun="sao mateus do sul" if mun=="sao matheus" & estado=="pr"
	replace mun="mallet" if mun=="sao pedro do mallet" & estado=="pr"
	*observation: Porto de Cima was later extinct (part of Morretes), so we need to update it!!!
	*observation 2: Conchas was later extinct (part of Ponta Grossa), so we need to update it!!!
	*observation 3: Assunguy de Cima was later extinct (part of Cerro Azul), so we need to update it!!!
	*observation $: Palmyra was later exticnt (part of Sao Joao do Triunfo), so we need to update it!
		
	replace mun="apiai" if mun=="apiahy" & estado=="sp"	
	replace mun="batatais" if mun=="batataes" & estado=="sp"	
	replace mun="braganca paulista" if mun=="braganca" & estado=="sp"		
	replace mun="jacarei" if mun=="jacarehy" & estado=="sp"	
	replace mun="jau" if mun=="jahu" & estado=="sp"	
	replace mun="jundiai" if mun=="jundiahy" & estado=="sp"	
	replace mun="lencois paulista" if mun=="lencoes" & estado=="sp"	
	replace mun="moji das cruzes" if mun=="mogy das cruzes" & estado=="sp"	
	replace mun="natividade da serra" if mun=="natividade" & estado=="sp"	
	replace mun="nazare paulista" if mun=="nazareth" & estado=="sp"	
	replace mun="paraibuna" if mun=="parahybuna" & estado=="sp"	
	replace mun="santana de parnaiba" if mun=="parnahyba" & estado=="sp"	
	replace mun="santa barbara doeste" if mun=="santa barbara" & estado=="sp"
	replace mun="sao bento do sapucai" if mun=="sao bento do sapucahy" & estado=="sp"
	replace mun="sarapui" if mun=="sarapuhy" & estado=="sp"
	replace mun="ibiuna" if mun=="una" & estado=="sp"
	replace mun="ilhabela" if mun=="villa bella" & estado=="sp"
	replace mun="eldorado" if mun=="xiririca" & estado=="sp"
	replace mun="tatui" if mun=="tatuhy" & estado=="sp"
	replace mun="bom sucesso de itarare" if mun=="bom sucesso" & estado=="sp"
	replace mun="monteiro lobato" if mun=="buquira" & estado=="sp"
	replace mun="aracoiaba da serra" if mun=="campo largo de sorocaba" & estado=="sp"
	replace mun="campos novos paulista" if mun=="campos novos do paranapanema" & estado=="sp"
	replace mun="capivari" if mun=="capivary" & estado=="sp"
	replace mun="guarei" if mun=="guarehy" & estado=="sp"
	replace mun="itapecerica da serra" if mun=="itapecerica" & estado=="sp"
	replace mun="mairipora" if mun=="juquery" & estado=="sp"
	replace mun="patrocinio paulista" if mun=="patrocinio do sapucahy" & estado=="sp"
	replace mun="redencao da serra" if mun=="redempcao" & estado=="sp"
	replace mun="bofete" if mun=="rio bonito" & estado=="sp"
	replace mun="aguas de santa barbara" if mun=="santa barbara do rio pardo" & estado=="sp"
	replace mun="sao bernardo do campo" if mun=="sao bernardo" & estado=="sp"
	replace mun="iporanga" if mun=="yporanga" & estado=="sp"
	replace mun="capao bonito" if mun=="capao bonito do paranapanema" & estado=="sp"
	replace mun="monte alegre do sul" if mun=="conceicao de monte alegre" & estado=="sp"
	replace mun="itapeva" if mun=="faxina" & estado=="sp"
	replace mun="mineiros do tiete" if mun=="mineiros" & estado=="sp"
	replace mun="pedreira" if mun=="pedreiras" & estado=="sp"
	replace mun="pilar do sul" if mun=="pilar" & estado=="sp"
	replace mun="piraju" if mun=="piraja" & estado=="sp"
	replace mun="sao jose do rio preto" if mun=="rio preto" & estado=="sp"
	replace mun="itai" if mun=="santo antonio da boa vista" & estado=="sp"
	replace mun="bocaina" if mun=="sao joao da bocaina" & estado=="sp"
	replace mun="tambau" if mun=="tambahu" & estado=="sp"
	replace mun="itu" if mun=="ytu" & estado=="sp"
	replace mun="boa esperanca do sul" if mun=="boa esperanca" & estado=="sp"
	replace mun="lins" if mun=="albuquerque lins" & estado=="sp"
	replace mun="amparo" if mun=="amaparo" & estado=="sp"
	replace mun="anhembi" if mun=="anhemby" & estado=="sp"
	replace mun="analandia" if mun=="annapolis" & estado=="sp"
	replace mun="avai" if mun=="avahy" & estado=="sp"
	replace mun="bariri" if mun=="bariry" & estado=="sp"
	replace mun="itapui" if mun=="bica de pedra" & estado=="sp"
	replace mun="brodosqui" if mun=="brodowski" & estado=="sp"
	replace mun="cachoeira paulista" if mun=="cachoeira" & estado=="sp"
	replace mun="ipaucu" if mun=="ipaussu" & estado=="sp"
	replace mun="itajobi" if mun=="itajoby" & estado=="sp"
	replace mun="laranjal paulista" if mun=="laranjal" & estado=="sp"
	replace mun="mogi guacu" if mun=="mogy guassu" & estado=="sp"
	replace mun="mojimirim" if mun=="mogy mirim" & estado=="sp"
	replace mun="monte azul paulista" if mun=="monte azul" & estado=="sp"
	replace mun="olimpia" if mun=="olympia" & estado=="sp"
	replace mun="santa cruz das palmeiras" if mun=="palmeiras" & estado=="sp"
	replace mun="penapolis" if mun=="pennapolis" & estado=="sp"
	replace mun="pirajui" if mun=="pirajuhy" & estado=="sp"
	replace mun="salesopolis" if mun=="sallesopolis" & estado=="sp"
	replace mun="salto grande" if mun=="salto grande do paranapanema" & estado=="sp"
	replace mun="cachoeira paulista" if mun=="cachoeira" & estado=="sp"
	replace mun="santa isabel" if mun=="santa izabel" & estado=="sp"
	replace mun="santa rosa de viterbo" if mun=="santa rosa" & estado=="sp"
	replace mun="sao joaquim da barra" if mun=="sao joaquim" & estado=="sp"
	replace mun="sao manuel" if mun=="sao manoel do paraiso" & estado=="sp"
	replace mun="tabapua" if mun=="tabapuau" & estado=="sp"	
	*observation: Santo Amaro was later extinct (part of Sao Paulo), so we need to update it!!!
	*observation 2: Pinheiros (part of Queluz) was later extintc, so we need to update it!!!
	*observation 3: Jatai was later exticnt (part of Cachoeira Paulista), so we need to update it!!!
	
	*now we merge:
	merge 1:1 estado mun using "$workingdata/amc_historic.dta"	
	drop if _m==2
	
	*make the updates:
	replace amc1872="41AMC1872_1997005" if mun=="porto de cima" & estado=="pr"
	replace amc1872="41AMC1872_1997002" if mun=="conchas" & estado=="pr"
	replace amc1872="41AMC1872_1997001" if mun=="assunguy de cima" & estado=="pr"
	replace amc1872="41AMC1872_1997002" if mun=="palmyra" & estado=="pr"
	replace amc1872="35AMC1872_1997046" if mun=="santo amaro" & estado=="sp"
	replace amc1872="35AMC1872_1997035" if mun=="pinheiros" & estado=="sp"
	replace amc1872="35AMC1872_1997026" if mun=="jatahy" & estado=="sp"
	drop _m
	
	*collapse:
	collapse (rawsum) pop1920 (firstnm) area1872, by(amc1872)	
	
	*generate pop density:
	gen pop_density1920=pop1920/area1872
	keep amc1872 pop_density1920
	drop if amc1872==""
	
	*merge and save:
	merge 1:1 amc1872 using "$workingdata/density1872.dta"
	drop _m
	save "$workingdata/density1872.dta", replace

****************************************
*1920 AMC level: 1920, 1940            *
****************************************

*import 1920 data:
import delim "$data/pop1920.csv", delim(";") case(preserve) clear		

	*change names for the merge:
	replace mun="bonfim" if mun=="bomfim" & estado=="mg"
	replace mun="cristina" if mun=="christina" & estado=="mg"
	replace mun="curvelo" if mun=="curvello" & estado=="mg"
	replace mun="mar de espanha" if mun=="mar de hespanha" & estado=="mg"
	replace mun="monte alegre de minas" if mun=="monte alegre" & estado=="mg"
	replace mun="rio pomba" if mun=="pomba" & estado=="mg"
	replace mun="conselheiro lafaiete" if mun=="queluz" & estado=="mg"
	replace mun="rio pardo de minas" if mun=="rio pardo" & estado=="mg"
	replace mun="aracuai" if mun=="arassuahy" & estado=="mg"
	replace mun="sao joao batista do gloria" if mun=="sao joao baptista" & estado=="mg"
	replace mun="sao joao del rei" if mun=="sao joao delrey" & estado=="mg"
	replace mun="araguari" if mun=="araguary" & estado=="mg"
	replace mun="aiuruoca" if mun=="ayuruoca" & estado=="mg"
	replace mun="baependi" if mun=="baependy" & estado=="mg"
	replace mun="bambui" if mun=="bambuhy" & estado=="mg"
	replace mun="bocaiuva" if mun=="bocayuva" & estado=="mg"
	replace mun="caete" if mun=="caethe" & estado=="mg"
	replace mun="cambui" if mun=="cambuhy" & estado=="mg"
	replace mun="campo belo" if mun=="campo bello" & estado=="mg"
	replace mun="andradas" if mun=="caracol" & estado=="mg"
	replace mun="monte carmelo" if mun=="monte carmello" & estado=="mg"
	replace mun="carmo do paranaiba" if mun=="carmo do paranahyba" & estado=="mg"
	replace mun="cataguases" if mun=="cataguazes" & estado=="mg"
	replace mun="dores do indaia" if mun=="dores do indaya" & estado=="mg"
	replace mun="entre rios de minas" if mun=="entre rios" & estado=="mg"
	replace mun="frutal" if mun=="fructal" & estado=="mg"
	replace mun="jacui" if mun=="jacuhy" & estado=="mg"
	replace mun="camanducaia" if mun=="jaguary" & estado=="mg"
	replace mun="manhuacu" if mun=="manhuassu" & estado=="mg"
	replace mun="santos dumont" if mun=="palmyra" & estado=="mg"
	replace mun="para de minas" if mun=="para" & estado=="mg"
	replace mun="pedralva" if mun=="pedra branca" & estado=="mg"
	replace mun="pitangui" if mun=="pitanguy" & estado=="mg"
	replace mun="piui" if mun=="piumhy" & estado=="mg"
	replace mun="santa rita do sapucai" if mun=="santa rita do sapucahy" & estado=="mg"
	replace mun="machado" if mun=="santo antonio do machado" & estado=="mg"
	replace mun="sao goncalo do sapucai" if mun=="sao goncalo do sapucahy" & estado=="mg"
	replace mun="alem paraiba" if mun=="alem parahyba" & estado=="mg"
	replace mun="teofilo otoni" if mun=="theophilo ottoni" & estado=="mg"
	replace mun="tres coracoes" if mun=="tres coracoes do rio verde" & estado=="mg"
	replace mun="belo horizonte" if mun=="bello horizonte" & estado=="mg"
	replace mun="monte azul" if mun=="boa vista do tremedal" & estado=="mg"
	replace mun="conceicao do mato dentro" if mun=="conceicao do serro" & estado=="mg"
	replace mun="boa esperanca" if mun=="dores da boa esperanca" & estado=="mg"
	replace mun="monte santo de minas" if mun=="monte santo" & estado=="mg"
	replace mun="patos de minas" if mun=="patos" & estado=="mg"
	replace mun="visconde do rio branco" if mun=="rio branco" & estado=="mg"
	replace mun="santa luzia" if mun=="santa luzia do rio das velhas" & estado=="mg"
	replace mun="muriae" if mun=="sao paulo do muriahe" & estado=="mg"
	replace mun="andrelandia" if mun=="turvo" & estado=="mg"
	replace mun="tupaciguara" if mun=="abadia do bom sucesso" & estado=="mg"
	replace mun="lambari" if mun=="aguas virtuosas" & estado=="mg"
	replace mun="aimores" if mun=="aymores" & estado=="mg"
	replace mun="campos gerais" if mun=="campos geraes" & estado=="mg"
	replace mun="capelinha" if mun=="capellinha" & estado=="mg"
	replace mun="eloi mendes" if mun=="eloy mendes" & estado=="mg"
	replace mun="estrela do sul" if mun=="estrella do sul" & estado=="mg"
	replace mun="fortaleza de minas" if mun=="fortaleza" & estado=="mg"
	replace mun="guarani" if mun=="guarany" & estado=="mg"
	replace mun="coracao de jesus" if mun=="inconfidencia" & estado=="mg"
	replace mun="ituiutaba" if mun=="ituyutaba" & estado=="mg"
	replace mun="mariana" if mun=="marianna" & estado=="mg"
	replace mun="paraguacu" if mun=="paraguassu" & estado=="mg"
	replace mun="pequi" if mun=="pequy" & estado=="mg"
	replace mun="ipanema" if mun=="rio jose pedro" & estado=="mg"
	replace mun="esmeraldas" if mun=="santa quiteria" & estado=="mg"
	replace mun="ferros" if mun=="santanna dos ferros" & estado=="mg"
	replace mun="sao gotardo" if mun=="sao gothardo" & estado=="mg"
	replace mun="botelhos" if mun=="sao jose dos botelhos" & estado=="mg"
	replace mun="eugenopolis" if mun=="sao manuel" & estado=="mg"
	replace mun="mutum" if mun=="sao manuel do mutum" & estado=="mg"
	replace mun="guanhaes" if mun=="sao miguel de guanhaes" & estado=="mg"
	replace mun="carmo de minas" if mun=="silvestre ferraz" & estado=="mg"
	replace mun="uberlandia" if mun=="uberabinha" & estado=="mg"
	replace mun="brasopolis" if mun=="villa braz" & estado=="mg"
	replace mun="brasilia de minas" if mun=="villa brazilia" & estado=="mg"
	replace mun="cambuquira" if mun=="villa de cambuqueira" & estado=="mg"
	replace mun="jequitinhonha" if mun=="villa jequitinhona" & estado=="mg"
	replace mun="nova lima" if mun=="villa nova de lima" & estado=="mg"
	replace mun="nova resende" if mun=="villa nova de rezende" & estado=="mg"
	replace mun="resende costa" if mun=="villa rezende costa" & estado=="mg"
	replace mun="rio espera" if mun=="villa rio espera" & estado=="mg"
	replace mun="nepomuceno" if mun=="villa nepomuceno" & estado=="mg"

	replace mun="sao jose dos pinhais" if mun=="sao jose dos pinhaes" & estado=="pr"
	replace mun="cerro azul" if mun=="serro azul" & estado=="pr"
	replace mun="bocaiuva do sul" if mun=="bocayuva" & estado=="pr"
	replace mun="curitiba" if mun=="curityba" & estado=="pr"
	replace mun="piraquara" if mun=="deodoro" & estado=="pr"
	replace mun="entre rios do oeste" if mun=="entre rios" & estado=="pr"
	replace mun="pirai do sul" if mun=="pirahy" & estado=="pr"
	replace mun="sao joao do triunfo" if mun=="sao joao do triumpho" & estado=="pr"
	replace mun="tomazina" if mun=="thomazina" & estado=="pr"
	replace mun="almirante tamandare" if mun=="tamandare" & estado=="pr"
	replace mun="tibagi" if mun=="tibagy" & estado=="pr"
	replace mun="uniao da vitoria" if mun=="uniao da victoria" & estado=="pr"
	replace mun="campina grande do sul" if mun=="campina grande" & estado=="pr"
	replace mun="ipiranga" if mun=="ypiranga" & estado=="pr"
	replace mun="foz do iguacu" if mun=="foz do iguassu" & estado=="pr"
	replace mun="guaraquecaba" if mun=="guarakessaba" & estado=="pr"
	replace mun="irati" if mun=="iraty" & estado=="pr"
	replace mun="jaguariaiva" if mun=="jaguariahyva" & estado=="pr"
	replace mun="marumbi" if mun=="marumby" & estado=="pr"
	replace mun="rio branco do ivai" if mun=="rio branco" & estado=="pr"
	replace mun="imbituva" if mun=="santo antonio do imbituva" & estado=="pr"
	replace mun="sao mateus do sul" if mun=="sao matheus" & estado=="pr"
	replace mun="mallet" if mun=="sao pedro do mallet" & estado=="pr"
	*observation: Porto de Cima was later extinct (part of Morretes), so we need to update it!!!
	*observation 2: Conchas was later extinct (part of Ponta Grossa), so we need to update it!!!
	*observation 3: Assunguy de Cima was later extinct (part of Cerro Azul), so we need to update it!!!
	*observation $: Palmyra was later exticnt (part of Sao Joao do Triunfo), so we need to update it!
		
	replace mun="apiai" if mun=="apiahy" & estado=="sp"	
	replace mun="batatais" if mun=="batataes" & estado=="sp"	
	replace mun="braganca paulista" if mun=="braganca" & estado=="sp"		
	replace mun="jacarei" if mun=="jacarehy" & estado=="sp"	
	replace mun="jau" if mun=="jahu" & estado=="sp"	
	replace mun="jundiai" if mun=="jundiahy" & estado=="sp"	
	replace mun="lencois paulista" if mun=="lencoes" & estado=="sp"	
	replace mun="moji das cruzes" if mun=="mogy das cruzes" & estado=="sp"	
	replace mun="natividade da serra" if mun=="natividade" & estado=="sp"	
	replace mun="nazare paulista" if mun=="nazareth" & estado=="sp"	
	replace mun="paraibuna" if mun=="parahybuna" & estado=="sp"	
	replace mun="santana de parnaiba" if mun=="parnahyba" & estado=="sp"	
	replace mun="santa barbara doeste" if mun=="santa barbara" & estado=="sp"
	replace mun="sao bento do sapucai" if mun=="sao bento do sapucahy" & estado=="sp"
	replace mun="sarapui" if mun=="sarapuhy" & estado=="sp"
	replace mun="ibiuna" if mun=="una" & estado=="sp"
	replace mun="ilhabela" if mun=="villa bella" & estado=="sp"
	replace mun="eldorado" if mun=="xiririca" & estado=="sp"
	replace mun="tatui" if mun=="tatuhy" & estado=="sp"
	replace mun="bom sucesso de itarare" if mun=="bom sucesso" & estado=="sp"
	replace mun="monteiro lobato" if mun=="buquira" & estado=="sp"
	replace mun="aracoiaba da serra" if mun=="campo largo de sorocaba" & estado=="sp"
	replace mun="campos novos paulista" if mun=="campos novos do paranapanema" & estado=="sp"
	replace mun="capivari" if mun=="capivary" & estado=="sp"
	replace mun="guarei" if mun=="guarehy" & estado=="sp"
	replace mun="itapecerica da serra" if mun=="itapecerica" & estado=="sp"
	replace mun="mairipora" if mun=="juquery" & estado=="sp"
	replace mun="patrocinio paulista" if mun=="patrocinio do sapucahy" & estado=="sp"
	replace mun="redencao da serra" if mun=="redempcao" & estado=="sp"
	replace mun="bofete" if mun=="rio bonito" & estado=="sp"
	replace mun="aguas de santa barbara" if mun=="santa barbara do rio pardo" & estado=="sp"
	replace mun="sao bernardo do campo" if mun=="sao bernardo" & estado=="sp"
	replace mun="iporanga" if mun=="yporanga" & estado=="sp"
	replace mun="capao bonito" if mun=="capao bonito do paranapanema" & estado=="sp"
	replace mun="monte alegre do sul" if mun=="conceicao de monte alegre" & estado=="sp"
	replace mun="itapeva" if mun=="faxina" & estado=="sp"
	replace mun="mineiros do tiete" if mun=="mineiros" & estado=="sp"
	replace mun="pedreira" if mun=="pedreiras" & estado=="sp"
	replace mun="pilar do sul" if mun=="pilar" & estado=="sp"
	replace mun="piraju" if mun=="piraja" & estado=="sp"
	replace mun="sao jose do rio preto" if mun=="rio preto" & estado=="sp"
	replace mun="itai" if mun=="santo antonio da boa vista" & estado=="sp"
	replace mun="bocaina" if mun=="sao joao da bocaina" & estado=="sp"
	replace mun="tambau" if mun=="tambahu" & estado=="sp"
	replace mun="itu" if mun=="ytu" & estado=="sp"
	replace mun="boa esperanca do sul" if mun=="boa esperanca" & estado=="sp"
	replace mun="lins" if mun=="albuquerque lins" & estado=="sp"
	replace mun="amparo" if mun=="amaparo" & estado=="sp"
	replace mun="anhembi" if mun=="anhemby" & estado=="sp"
	replace mun="analandia" if mun=="annapolis" & estado=="sp"
	replace mun="avai" if mun=="avahy" & estado=="sp"
	replace mun="bariri" if mun=="bariry" & estado=="sp"
	replace mun="itapui" if mun=="bica de pedra" & estado=="sp"
	replace mun="brodosqui" if mun=="brodowski" & estado=="sp"
	replace mun="cachoeira paulista" if mun=="cachoeira" & estado=="sp"
	replace mun="ipaucu" if mun=="ipaussu" & estado=="sp"
	replace mun="itajobi" if mun=="itajoby" & estado=="sp"
	replace mun="laranjal paulista" if mun=="laranjal" & estado=="sp"
	replace mun="mogi guacu" if mun=="mogy guassu" & estado=="sp"
	replace mun="mojimirim" if mun=="mogy mirim" & estado=="sp"
	replace mun="monte azul paulista" if mun=="monte azul" & estado=="sp"
	replace mun="olimpia" if mun=="olympia" & estado=="sp"
	replace mun="santa cruz das palmeiras" if mun=="palmeiras" & estado=="sp"
	replace mun="penapolis" if mun=="pennapolis" & estado=="sp"
	replace mun="pirajui" if mun=="pirajuhy" & estado=="sp"
	replace mun="salesopolis" if mun=="sallesopolis" & estado=="sp"
	replace mun="salto grande" if mun=="salto grande do paranapanema" & estado=="sp"
	replace mun="cachoeira paulista" if mun=="cachoeira" & estado=="sp"
	replace mun="santa isabel" if mun=="santa izabel" & estado=="sp"
	replace mun="santa rosa de viterbo" if mun=="santa rosa" & estado=="sp"
	replace mun="sao joaquim da barra" if mun=="sao joaquim" & estado=="sp"
	replace mun="sao manuel" if mun=="sao manoel do paraiso" & estado=="sp"
	replace mun="tabapua" if mun=="tabapuau" & estado=="sp"	
	*observation: Santo Amaro was later extinct (part of Sao Paulo), so we need to update it!!!
	*observation 2: Pinheiros (part of Queluz) was later extintc, so we need to update it!!!
	*observation 3: Jatai was later exticnt (part of Cachoeira Paulista), so we need to update it!!!
	
	*now we merge:
	merge 1:1 estado mun using "$workingdata/amc_historic.dta"	
	drop if _m==2
	
	*make the updates:
	replace amc1920="41AMC2097010" if mun=="porto de cima" & estado=="pr"
	replace amc1920="41AMC2097002" if mun=="conchas" & estado=="pr"
	replace amc1920="41AMC2097001" if mun=="assunguy de cima" & estado=="pr"
	replace amc1920="41AMC2097016" if mun=="palmyra" & estado=="pr"
	replace amc1920="35AMC2097090" if mun=="santo amaro" & estado=="sp"
	replace amc1920="354190" if mun=="pinheiros" & estado=="sp"
	replace amc1920="350860" if mun=="jatahy" & estado=="sp"
	drop _m
	
	*collapse:
	collapse (rawsum) pop1920 (firstnm) area1920, by(amc1920)	
	
	*generate pop density:
	gen pop_density1920=pop1920/area1920
	keep amc1920 pop_density1920
	drop if amc1920==""
	
	*save:
	save "$workingdata/density1920.dta", replace
	
*import 1940 data:
import delim "$data/pop1940.csv", delim(";") case(preserve) clear		

	*change names to the merge
	replace mun="foz do iguacu" if mun=="foz do iguassu" & estado=="pr"
	replace mun="mallet" if mun=="male" & estado=="pr"
	replace mun="wenceslau braz" if mun=="venceslau braz" & estado=="pr"
	replace mun="sao mateus do sul" if mun=="sao mateus" & estado=="pr"
	replace mun="sao jeronimo da serra" if mun=="sao jeronimo" & estado=="pr"
	replace mun="pirai do sul" if mun=="pirai" & estado=="pr"
	replace mun="bocaiuva do sul" if mun=="bocaiuva" & estado=="pr"
		
	replace mun="braganca paulista" if mun=="braganca" & estado=="sp"		
	replace mun="lencois paulista" if mun=="lencois" & estado=="sp"	
	replace mun="moji das cruzes" if mun=="mogi das cruzes" & estado=="sp"	
	replace mun="natividade da serra" if mun=="natividade" & estado=="sp"	
	replace mun="nazare paulista" if mun=="nazare" & estado=="sp"	
	replace mun="santana de parnaiba" if mun=="parnaiba" & estado=="sp"	
	replace mun="santa barbara doeste" if mun=="santa barbara" & estado=="sp"
	replace mun="ibiuna" if mun=="una" & estado=="sp"
	replace mun="eldorado" if mun=="xiririca" & estado=="sp"
	replace mun="itapecerica da serra" if mun=="itapecerica" & estado=="sp"
	replace mun="aguas de santa barbara" if mun=="santa barbara do rio pardo" & estado=="sp"
	replace mun="mineiros do tiete" if mun=="mineiros" & estado=="sp"
	replace mun="pilar do sul" if mun=="pilar" & estado=="sp"
	replace mun="sao jose do rio preto" if mun=="rio preto" & estado=="sp"
	replace mun="boa esperanca do sul" if mun=="boa esperanca" & estado=="sp"
	replace mun="cachoeira paulista" if mun=="cachoeira" & estado=="sp"
	replace mun="ipaucu" if mun=="ipaussu" & estado=="sp"
	replace mun="laranjal paulista" if mun=="laranjal" & estado=="sp"
	replace mun="mogi guacu" if mun=="mogi guassu" & estado=="sp"
	replace mun="mojimirim" if mun=="mogi mirim" & estado=="sp"
	replace mun="monte azul paulista" if mun=="monte azul" & estado=="sp"
	replace mun="santa cruz das palmeiras" if mun=="palmeiras" & estado=="sp"
	replace mun="santa rosa de viterbo" if mun=="santa rosa" & estado=="sp"
	replace mun="sao joaquim da barra" if mun=="sao joaquim" & estado=="sp"
	replace mun="sao manuel" if mun=="sao manoel" & estado=="sp"	
	replace mun="analandia" if mun=="anapolis" & estado=="sp"
	replace mun="sao jose do barreiro" if mun=="barreiro" & estado=="sp"
	replace mun="sao jose da bela vista" if mun=="bela vista" & estado=="sp"
	replace mun="macatuba" if mun=="bocaiuva" & estado=="sp"
	replace mun="aracoiaba da serra" if mun=="campo largo" & estado=="sp"
	replace mun="ilhabela" if mun=="formosa" & estado=="sp"
	replace mun="sao sebastiao da grama" if mun=="grama" & estado=="sp"
	replace mun="mairipora" if mun=="juqueri" & estado=="sp"
	replace mun="urupes" if mun=="mundo novo" & estado=="sp"
	replace mun="paraguacu paulista" if mun=="paraguassu" & estado=="sp"
	replace mun="patrocinio paulista" if mun=="patrocinio do sapucai" & estado=="sp"
	replace mun="espirito santo do pinhal" if mun=="pinhal" & estado=="sp"
	replace mun="anhembi" if mun=="piramboia" & estado=="sp"
	replace mun="miracatu" if mun=="prainha" & estado=="sp"
	replace mun="redencao da serra" if mun=="redencao" & estado=="sp"
	replace mun="santa rita do passa quatro" if mun=="santa rita" & estado=="sp"
	replace mun="sao luis do paraitinga" if mun=="sao luiz do paraitinga" & estado=="sp"
	replace mun="taquarituba" if mun=="taquari" & estado=="sp"
	replace mun="vargem grande do sul" if mun=="vargem grande" & estado=="sp"
	*observation 2: Pinheiros (part of Queluz) was later extintc, so we need to update it!!!
	
	*now we merge:
	merge 1:1 estado mun using "$workingdata/amc_historic.dta"	
	drop if _m==2
	
	*make the updates:
	replace amc1920="354190" if mun=="pinheiros" & estado=="sp"
	drop _m
	
	*collapse:
	collapse (rawsum) pop1940 (firstnm) area1920, by(amc1920)	
	
	*generate pop density:
	gen pop_density1940=pop1940/area1920
	keep amc1920 pop_density1940
	drop if amc1920==""
	
	*merge and save:
	merge 1:1 amc1920 using "$workingdata/density1920.dta"
	drop _m
	save "$workingdata/density1920.dta", replace

****************************************
*1940 AMC level: 1940, 1950, 1960      *
****************************************

*import 1940 data:	
import delim "$data/pop1940.csv", delim(";") case(preserve) clear	

	*change names for the merge
	replace mun="foz do iguacu" if mun=="foz do iguassu" & estado=="pr"
	replace mun="mallet" if mun=="male" & estado=="pr"
	replace mun="wenceslau braz" if mun=="venceslau braz" & estado=="pr"
	replace mun="sao mateus do sul" if mun=="sao mateus" & estado=="pr"
	replace mun="sao jeronimo da serra" if mun=="sao jeronimo" & estado=="pr"
	replace mun="pirai do sul" if mun=="pirai" & estado=="pr"
	replace mun="bocaiuva do sul" if mun=="bocaiuva" & estado=="pr"
		
	replace mun="braganca paulista" if mun=="braganca" & estado=="sp"		
	replace mun="lencois paulista" if mun=="lencois" & estado=="sp"	
	replace mun="moji das cruzes" if mun=="mogi das cruzes" & estado=="sp"	
	replace mun="natividade da serra" if mun=="natividade" & estado=="sp"	
	replace mun="nazare paulista" if mun=="nazare" & estado=="sp"	
	replace mun="santana de parnaiba" if mun=="parnaiba" & estado=="sp"	
	replace mun="santa barbara doeste" if mun=="santa barbara" & estado=="sp"
	replace mun="ibiuna" if mun=="una" & estado=="sp"
	replace mun="eldorado" if mun=="xiririca" & estado=="sp"
	replace mun="itapecerica da serra" if mun=="itapecerica" & estado=="sp"
	replace mun="aguas de santa barbara" if mun=="santa barbara do rio pardo" & estado=="sp"
	replace mun="mineiros do tiete" if mun=="mineiros" & estado=="sp"
	replace mun="pilar do sul" if mun=="pilar" & estado=="sp"
	replace mun="sao jose do rio preto" if mun=="rio preto" & estado=="sp"
	replace mun="boa esperanca do sul" if mun=="boa esperanca" & estado=="sp"
	replace mun="cachoeira paulista" if mun=="cachoeira" & estado=="sp"
	replace mun="ipaucu" if mun=="ipaussu" & estado=="sp"
	replace mun="laranjal paulista" if mun=="laranjal" & estado=="sp"
	replace mun="mogi guacu" if mun=="mogi guassu" & estado=="sp"
	replace mun="mojimirim" if mun=="mogi mirim" & estado=="sp"
	replace mun="monte azul paulista" if mun=="monte azul" & estado=="sp"
	replace mun="santa cruz das palmeiras" if mun=="palmeiras" & estado=="sp"
	replace mun="santa rosa de viterbo" if mun=="santa rosa" & estado=="sp"
	replace mun="sao joaquim da barra" if mun=="sao joaquim" & estado=="sp"
	replace mun="sao manuel" if mun=="sao manoel" & estado=="sp"	
	replace mun="analandia" if mun=="anapolis" & estado=="sp"
	replace mun="sao jose do barreiro" if mun=="barreiro" & estado=="sp"
	replace mun="sao jose da bela vista" if mun=="bela vista" & estado=="sp"
	replace mun="macatuba" if mun=="bocaiuva" & estado=="sp"
	replace mun="aracoiaba da serra" if mun=="campo largo" & estado=="sp"
	replace mun="ilhabela" if mun=="formosa" & estado=="sp"
	replace mun="sao sebastiao da grama" if mun=="grama" & estado=="sp"
	replace mun="mairipora" if mun=="juqueri" & estado=="sp"
	replace mun="urupes" if mun=="mundo novo" & estado=="sp"
	replace mun="paraguacu paulista" if mun=="paraguassu" & estado=="sp"
	replace mun="patrocinio paulista" if mun=="patrocinio do sapucai" & estado=="sp"
	replace mun="espirito santo do pinhal" if mun=="pinhal" & estado=="sp"
	replace mun="anhembi" if mun=="piramboia" & estado=="sp"
	replace mun="miracatu" if mun=="prainha" & estado=="sp"
	replace mun="redencao da serra" if mun=="redencao" & estado=="sp"
	replace mun="santa rita do passa quatro" if mun=="santa rita" & estado=="sp"
	replace mun="sao luis do paraitinga" if mun=="sao luiz do paraitinga" & estado=="sp"
	replace mun="taquarituba" if mun=="taquari" & estado=="sp"
	replace mun="vargem grande do sul" if mun=="vargem grande" & estado=="sp"
	*observation 2: Pinheiros (part of Queluz) was later extintc, so we need to update it!!!
	
	*now we merge:
	merge 1:1 estado mun using "$workingdata/amc_historic.dta"	
	drop if _m==2
	
	*make the updates:
	replace amc1940="354190" if mun=="pinheiros" & estado=="sp"
	drop _m
	
	*collapse:
	collapse (rawsum) pop1940 (firstnm) area1940, by(amc1940)	
	
	*generate pop density:
	gen pop_density1940=pop1940/area1940
	keep amc1940 pop_density1940
	drop if amc1940==""
	
	*save:
	save "$workingdata/density1940.dta", replace
	
*import 1950 data:
import delim "$data/pop1950.csv", delim(";") case(preserve) clear	

	*change names for the merge:
	replace mun = "sao jeronimo da serra" if mun=="araiporanga"	& estado=="pr"
	replace mun = "jundiai do sul" if mun=="cinzas"	& estado=="pr"
	replace mun = "almirante tamandare" if mun=="timoneira"	& estado=="pr"
	
	replace mun="mogi guacu" if mun=="mogiguacu" & estado=="sp"
	replace mun="mojimirim" if mun=="mogimirim" & estado=="sp"
	replace mun="moji das cruzes" if mun=="mogi das cruzes" & estado=="sp"	
	replace mun="espirito santo do pinhal" if mun=="pinhal" & estado=="sp"	
	replace mun="aguas de santa barbara" if mun=="santa barbara do rio pardo" & estado=="sp"	
	replace mun="sao jose do barreiro" if mun=="barreiro" & estado=="sp"	
	replace mun="tupi paulista" if mun=="gracianopolis" & estado=="sp"
	
	*now we merge:
	merge 1:1 estado mun using "$workingdata\amc_historic.dta"	
	drop if _m==2
	
	*collapse:
	collapse (rawsum) pop1950 (firstnm) area1940, by(amc1940)	
	
	*generate pop density:
	gen pop_density1950=pop1950/area1940
	keep amc1940 pop_density1950
	drop if amc1940==""
	
	*merge and save:
	merge 1:1 amc1940 using "$workingdata/density1940.dta"
	drop _m
	save "$workingdata/density1940.dta", replace
	
*import 1960 population:
import delim "$data/pop1960.csv", delim(";") case(preserve) clear	

	*change names in SP:
	replace mun = "marilandia do sul" if mun=="araruva" & estado=="pr"
	replace mun = "candido de abreu" if mun=="candido abreu" & estado=="pr"	
	replace mun = "cruzeiro do oeste" if mun=="cruzeiro doeste" & estado=="pr"	
	replace mun = "santo antonio do sudoeste" if mun=="santo antonio" & estado=="pr"
	replace mun = "santo antonio da platina" if mun=="santo antonio do platina" & estado=="pr"
	replace mun = "marilandia do sul" if mun=="arauva" & estado=="pr"
	replace mun="wenceslau braz" if mun=="venceslau braz" & estado=="pr"
	replace mun="sao sebastiao da amoreira" if mun=="amoreira" & estado=="pr"
	replace mun="quatigua" if mun=="quatiga" & estado=="pr"
	replace mun="santa cruz de monte castelo" if mun=="santa cruz do monte castelo" & estado=="pr"
	replace mun="itaguaje" if mun=="itaquaje" & estado=="pr"
	replace mun="manoel ribas" if mun=="manuel ribas" & estado=="pr"
	replace mun="sao jorge do ivai" if mun=="sao jorge" & estado=="pr"
	replace mun="wenceslau braz" if mun=="venceslau bras" & estado=="pr"
	
	*change names in SP:
	replace mun="alto alegre" if mun=="alto alege" & estado=="sp"	
	replace mun="iracemapolis" if mun=="iracemopolis" & estado=="sp"	
	replace mun="mogi guacu" if mun=="mogiguacu" & estado=="sp"	
	replace mun="pariqueraacu" if mun=="pariquera acu" & estado=="sp"	
	replace mun="pirapozinho" if mun=="pirapozinha" & estado=="sp"	
	replace mun="paraibuna" if mun=="praibuna" & estado=="sp"	
	replace mun="riversul" if mun=="ribeirao vermelho do sul" & estado=="sp"	
	replace mun="santa gertrudes" if mun=="santa gertrurdes" & estado=="sp"	
	replace mun="santo anastacio" if mun=="santo anstacio" & estado=="sp"	
	replace mun="aguas de santa barbara" if mun=="santa barbara do rio pardo" & estado=="sp"	
	replace mun="mojimirim" if mun=="mogimirim" & estado=="sp"	
	replace mun="moji das cruzes" if mun=="mogi das cruzes" & estado=="sp"	
	replace mun="salmourao" if mun=="salmorao" & estado=="sp"	
	replace mun="espirito santo do pinhal" if mun=="pinhal" & estado=="sp"	
	replace mun="bady bassitt" if mun=="borboleta" & estado=="sp"	
	replace mun="florinia" if mun=="florinea" & estado=="sp"	
	replace mun="cristais paulista" if mun=="guapua" & estado=="sp"	
	replace mun="luiziania" if mun=="luisiania" & estado=="sp"	
	replace mun="sud mennucci" if mun=="sud menucci" & estado=="sp"	
	replace mun="aguas da prata" if mun=="aguas de prata" & estado=="sp"	
	replace mun="areiopolis" if mun=="araiopolis"	 & estado=="sp"	
	replace mun="brodosqui" if mun=="brodoqui" & estado=="sp"	
	replace mun="guaicara" if mun=="gauicara" & estado=="sp"	
	replace mun="itapecerica da serra" if mun=="itapacerica da serra" & estado=="sp"	
	replace mun="nuporanga" if mun=="muporanga"	 & estado=="sp"	
	replace mun="potirendaba" if mun=="potirandaba"	 & estado=="sp"	
	replace mun="santana de parnaiba" if mun=="santana do parnaiba"	 & estado=="sp"	
	replace mun="santo antonio de posse" if mun=="santo antonio da posse"	 & estado=="sp"	
	replace mun="sao sebastiao da grama" if mun=="sao sebastiao do grama" & estado=="sp"	
	replace mun="taiacu" if mun=="talacu"	 & estado=="sp"	
	replace mun="vista alegre do alto" if mun=="viata alegre do alto"	 & estado=="sp"	
	
	*change names in MG:
	replace mun="alto alegre" if mun=="alto alerge" & estado=="mg"	
	replace mun="estrela do indaia" if mun=="estrela do india" & estado=="mg"
	replace mun="ipuinha" if mun=="ipuiuna" & estado=="mg"
	replace mun="itapagipe" if mun=="itapagire" & estado=="mg"
	replace mun="joaima" if mun=="joaiha" & estado=="mg"
	replace mun="lagoa da praia" if mun=="lagoa da prata" & estado=="mg"
	replace mun="madre de deus de minas" if mun=="madre deus de minas" & estado=="mg"
	replace mun="minduri" if mun=="minouri" & estado=="mg"
	replace mun="novo cruzeiro" if mun=="novo curzeiro" & estado=="mg"
	replace mun="varzea da palma" if mun=="varzea do palma" & estado=="mg"	
	replace mun="alto jequitiba" if mun=="presidente soares" & estado=="mg"
	replace mun="brasilia de minas" if mun=="brasilia" & estado=="mg"
	replace mun="brasopolis" if mun=="brazopolis" & estado=="mg"
	replace mun="carvalhopolis" if mun=="cana do reino" & estado=="mg"
	replace mun="conselheiro lafaiete" if mun=="conselheiro lafaiette" & estado=="mg"
	replace mun="sao roque de minas" if mun=="guia lopes" & estado=="mg"
	replace mun="ipuiuna" if mun=="ipuinha" & estado=="mg"
	replace mun="lagoa da prata" if mun=="lagoa da praia" & estado=="mg"
	
	*now we merge with AMCs:
	merge 1:1 estado mun using "$workingdata/amc_historic.dta"	
	keep if _m==3

	*collapse:
	collapse (rawsum) pop1960 (firstnm) area1940, by(amc1940)	
	
	*generate pop density:
	gen pop_density1960=pop1960/area1940
	keep amc1940 pop_density1960
	drop if amc1940==""
	
	*merge and save:
	merge 1:1 amc1940 using "$workingdata/density1940.dta"
	drop _m
	save "$workingdata/density1940.dta", replace
	
****************************************
*1960 AMC level: 1960, 1970            *
****************************************

*import 1960 population:
import delim "$data/pop1960.csv", delim(";") case(preserve) clear	

	*change names in SP:
	replace mun = "marilandia do sul" if mun=="araruva" & estado=="pr"
	replace mun = "candido de abreu" if mun=="candido abreu" & estado=="pr"	
	replace mun = "cruzeiro do oeste" if mun=="cruzeiro doeste" & estado=="pr"	
	replace mun = "santo antonio do sudoeste" if mun=="santo antonio" & estado=="pr"
	replace mun = "santo antonio da platina" if mun=="santo antonio do platina" & estado=="pr"
	replace mun = "marilandia do sul" if mun=="arauva" & estado=="pr"
	replace mun="wenceslau braz" if mun=="venceslau braz" & estado=="pr"
	replace mun="sao sebastiao da amoreira" if mun=="amoreira" & estado=="pr"
	replace mun="quatigua" if mun=="quatiga" & estado=="pr"
	replace mun="santa cruz de monte castelo" if mun=="santa cruz do monte castelo" & estado=="pr"
	replace mun="itaguaje" if mun=="itaquaje" & estado=="pr"
	replace mun="manoel ribas" if mun=="manuel ribas" & estado=="pr"
	replace mun="sao jorge do ivai" if mun=="sao jorge" & estado=="pr"
	replace mun="wenceslau braz" if mun=="venceslau bras" & estado=="pr"
	
	*change names in SP:
	replace mun="alto alegre" if mun=="alto alege" & estado=="sp"	
	replace mun="iracemapolis" if mun=="iracemopolis" & estado=="sp"	
	replace mun="mogi guacu" if mun=="mogiguacu" & estado=="sp"	
	replace mun="pariqueraacu" if mun=="pariquera acu" & estado=="sp"	
	replace mun="pirapozinho" if mun=="pirapozinha" & estado=="sp"	
	replace mun="paraibuna" if mun=="praibuna" & estado=="sp"	
	replace mun="riversul" if mun=="ribeirao vermelho do sul" & estado=="sp"	
	replace mun="santa gertrudes" if mun=="santa gertrurdes" & estado=="sp"	
	replace mun="santo anastacio" if mun=="santo anstacio" & estado=="sp"	
	replace mun="aguas de santa barbara" if mun=="santa barbara do rio pardo" & estado=="sp"	
	replace mun="mojimirim" if mun=="mogimirim" & estado=="sp"	
	replace mun="moji das cruzes" if mun=="mogi das cruzes" & estado=="sp"	
	replace mun="salmourao" if mun=="salmorao" & estado=="sp"	
	replace mun="espirito santo do pinhal" if mun=="pinhal" & estado=="sp"	
	replace mun="bady bassitt" if mun=="borboleta" & estado=="sp"	
	replace mun="florinia" if mun=="florinea" & estado=="sp"	
	replace mun="cristais paulista" if mun=="guapua" & estado=="sp"	
	replace mun="luiziania" if mun=="luisiania" & estado=="sp"	
	replace mun="sud mennucci" if mun=="sud menucci" & estado=="sp"	
	replace mun="aguas da prata" if mun=="aguas de prata" & estado=="sp"	
	replace mun="areiopolis" if mun=="araiopolis"	 & estado=="sp"	
	replace mun="brodosqui" if mun=="brodoqui" & estado=="sp"	
	replace mun="guaicara" if mun=="gauicara" & estado=="sp"	
	replace mun="itapecerica da serra" if mun=="itapacerica da serra" & estado=="sp"	
	replace mun="nuporanga" if mun=="muporanga"	 & estado=="sp"	
	replace mun="potirendaba" if mun=="potirandaba"	 & estado=="sp"	
	replace mun="santana de parnaiba" if mun=="santana do parnaiba"	 & estado=="sp"	
	replace mun="santo antonio de posse" if mun=="santo antonio da posse"	 & estado=="sp"	
	replace mun="sao sebastiao da grama" if mun=="sao sebastiao do grama" & estado=="sp"	
	replace mun="taiacu" if mun=="talacu"	 & estado=="sp"	
	replace mun="vista alegre do alto" if mun=="viata alegre do alto"	 & estado=="sp"	
	
	*change names in MG:
	replace mun="alto alegre" if mun=="alto alerge" & estado=="mg"	
	replace mun="estrela do indaia" if mun=="estrela do india" & estado=="mg"
	replace mun="ipuinha" if mun=="ipuiuna" & estado=="mg"
	replace mun="itapagipe" if mun=="itapagire" & estado=="mg"
	replace mun="joaima" if mun=="joaiha" & estado=="mg"
	replace mun="lagoa da praia" if mun=="lagoa da prata" & estado=="mg"
	replace mun="madre de deus de minas" if mun=="madre deus de minas" & estado=="mg"
	replace mun="minduri" if mun=="minouri" & estado=="mg"
	replace mun="novo cruzeiro" if mun=="novo curzeiro" & estado=="mg"
	replace mun="varzea da palma" if mun=="varzea do palma" & estado=="mg"	
	replace mun="alto jequitiba" if mun=="presidente soares" & estado=="mg"
	replace mun="brasilia de minas" if mun=="brasilia" & estado=="mg"
	replace mun="brasopolis" if mun=="brazopolis" & estado=="mg"
	replace mun="carvalhopolis" if mun=="cana do reino" & estado=="mg"
	replace mun="conselheiro lafaiete" if mun=="conselheiro lafaiette" & estado=="mg"
	replace mun="sao roque de minas" if mun=="guia lopes" & estado=="mg"
	replace mun="ipuiuna" if mun=="ipuinha" & estado=="mg"
	replace mun="lagoa da prata" if mun=="lagoa da praia" & estado=="mg"
	
	*now we merge with amc:
	merge 1:1 estado mun using "$workingdata/amc_historic.dta"	
	keep if _m==3

	*collapse:
	collapse (rawsum) pop1960 (firstnm) area1960, by(amc1960)	
	
	*generate pop density:
	gen pop_density1960=pop1960/area1960
	keep amc1960 pop_density1960
	drop if amc1960==""
	
	*save:
	save "$workingdata/density1960.dta", replace
	
*import 1970 data:	
import delim "$data/pop_mun.csv", delim(";") case(preserve) clear	
	keep cod_municipio pop1970
	rename cod_municipio UFMUN
	destring UFMUN, replace
	replace UFMUN=floor(UFMUN/10)

	*now we merge with amc:
	merge 1:1 UFMUN using "$workingdata/amc_historic.dta"
	drop _m
	
	*collapse:
	collapse (rawsum) pop1970 (firstnm) area1960, by(amc1960)
	
	*generate pop density:
	gen pop_density1970=pop1970/area1960
	keep amc1960 pop_density1970
	drop if amc1960==""
	
	*merge and save:
	merge 1:1 amc1960 using "$workingdata/density1960.dta"
	drop _m
	save "$workingdata/density1960.dta", replace
	
*************************
*NOW APPEND ALL DATASETS
*************************

*get the 1960 dataset:
use "$workingdata/density1960.dta", clear

	*add year:
	gen amc_year=1960
	rename amc1960 amc
	
	*and save:
	save "$workingdata/pastpopdensity.dta", replace
	
*and now for the previous years:
foreach y in 1940 1920 1872 {

use "$workingdata/density`y'.dta", clear

	*add year:
	gen amc_year=`y'
	rename amc`y' amc
	
	*append:
	append using "$workingdata/pastpopdensity.dta"
	
	*and save:
	save "$workingdata/pastpopdensity.dta", replace

}	


*label, compress and save:
foreach y in 1872 1890 1900 1920 1940 1950 1960 1970 {
la var pop_density`y' "Population density, `y'"
}
la var amc "AMC"
la var amc_year "AMC year"
compress
save "$workingdata/pastpopdensity.dta", replace

*and erase:
foreach y in 1960 1940 1920 1872 {
erase "$workingdata/density`y'.dta"
}

********************************************************************************
*6) PREPARE BOLSONARO VOTE SHARE
********************************************************************************

import delimited "$data/votacao_candidato_munzona_2018_BR.csv", delimiter(";") clear

*now we define the vote in each round:
gen round=2 if dt_eleicao=="28/10/2018"
replace round=1 if dt_eleicao=="07/10/2018"

	*generate:
	foreach d in 1 2 {
	gen vote_round`d'= qt_votos_nominais if round==`d'
	gen vote_bolsonaro_round`d' = qt_votos_nominais if round==`d' & nr_candidato==17
	}

*collapse by mun:
collapse (rawsum) vote_*, by(cd_municipio)
rename cd_municipio tse_mun

	*and we generate the Bolsonaro share:
	foreach d in 1 2 {
	gen sh_bolsonaro_round`d' = vote_bolsonaro_round`d'/vote_round`d'
	}
	
*merge with TSE-IBGE crosswalk:
keep tse_mun sh_bolsonaro_*
merge 1:1 tse_mun using "$data/crosswalk_tse.dta"
keep if _m==3
drop _m

*label and save:
la var sh_bolsonaro_round1 "Share of Bolsonaro vote (first round)"
la var sh_bolsonaro_round2 "Share of Bolsonaro vote (second round)"
save "$workingdata/frontier_vars.dta", replace

********************************************************************************
*7) PREPARE NUMBER OF SECTORS BY MUNICIPALITY
********************************************************************************

use "$data/censo10.dta", clear

*keep only the ones employed:
keep if ativ2010!=. & ativ2010>0

*exclude agriculture and mining:
drop if ativ2010 < 10000

*generate the 2 digit sector:
gen ativ_divisao = floor(ativ2010/1000)

*we can drop direct government:
drop if ativ_divisao==84
	
*generate the 3 digit sector:
gen ativ_grupo = floor(ativ2010/100)	

*and the 4 digit sector:
gen ativ_classe = floor(ativ2010/10)	

*and the 5 digit sector:
gen ativ_subclasse = ativ2010

*generate the number of activities by municipality:
rename munic cod_municipio_6dt
foreach c in subclasse classe grupo divisao {
sort cod_municipio_6dt ativ_`c'
gen aux = ativ_`c'
replace aux = . if ativ_`c'[_n-1]==ativ_`c' & cod_municipio_6dt[_n-1]==cod_municipio_6dt
bysort codmun: egen nr_`c' = rank(aux), field
drop aux
}	

*collapse:
collapse (max) nr_subclasse nr_classe nr_grupo nr_divisao [aw=peso_pess], by(cod_municipio_6dt)

*label and save:
foreach c in subclasse classe grupo divisao {
la var nr_`c' "Number of sectors in municipality, CNAE 2.0 `c', 2010"
}
compress
save "$workingdata/sectors2010.dta", replace

********************************************************************************
*8) PREPARE NATIONAL MIGRATION FLOWS
********************************************************************************

*import the migration flows:
import delim "$data/migration.csv", delim(";") case(preserve) clear	

*now we generate the time as endpoint:
gen migration_tot0=migration
forvalues i=1/21 {
local j=`i'-1
gen migration_tot`i' = migration_tot`j'+migration[_n+`i']
}

*and reshape:
drop migration
reshape long migration_tot, i(year) j(time_as_endpoint)

*and save:
la var migration_tot "Cumulative migration in years as endpoint"
compress
save "$workingdata/cum_migration.dta", replace

********************************************************************************
*9) PREPARE THE DATASET OF THE NEIGHBORING MUNICIPALITIES
********************************************************************************	
	
*import the population data:
import delim "$data/pop_mun.csv", delim(";") case(preserve) stringcols(_all) clear	
keep cod_municipio pop2010 pop_urban2010
rename pop_urban2010 pop_urban
rename pop2010 pop

	*and save:
	save "$workingdata/mun_aux.dta", replace	
	
*import the municipality area data:
import delim "$data/mun_area.csv", delim(";") case(preserve) stringcols(_all) clear	

	*merge:
	rename CD_GCMUN cod_municipio
	rename AR_MUN_2016 mun_area
	keep cod_municipio mun_area
	drop if cod_municipio==""
	merge 1:1 cod_municipio using "$workingdata/mun_aux.dta"
	keep if _m==3
	drop _m
	
	*and save:
	save "$workingdata/mun_aux.dta", replace
	
*import the gdp:
import delim "$data/gdp.csv", delim(";") case(preserve) stringcols(_all) clear	

	*merge:
	drop if cod_municipio==""
	merge 1:1 cod_municipio using "$workingdata/mun_aux.dta"
	keep if _m==3
	drop _m
	
	*and save:
	save "$workingdata/mun_aux.dta", replace
	
*import the shapefile attribute table:
import delim "$data/mun_shapefile.csv", delim(";") case(preserve) stringcols(_all) clear	

	*prepare:
	drop ID
	rename CD_GEOCODM cod_municipio
	drop NM_MUNICIP
	foreach v in xcoord ycoord alt_mean tri_mean coffee_mean maize_mean sugar_mean malaria_mean latosol acrisol terraroxa {
	destring `v', replace
	}
	
	*and merge:
	merge 1:1 cod_municipio using "$workingdata/mun_aux.dta"
	keep if _m==3
	drop _m
	
	*and save:
	save "$workingdata/mun_aux.dta", replace
	
*import those that have a river:
import delim "$data/mun_rivers.csv", delim(";") case(preserve) stringcols(_all) clear	
rename CD_GEOCODM cod_municipio
keep cod_municipio
gen river = 1

	*and merge:
	merge 1:1 cod_municipio using "$workingdata/mun_aux.dta"
	drop if _m==1
	drop _m
	replace river = 0 if river!=1	
	
*and now we collapse Estiva Gerbi into Mogi-Guacu:
replace cod_municipio="3530706" if cod_municipio=="3557303"

	*destring:
	foreach v in pop pop_urban mun_area gdp total_va ag_va man_va serv_va gov_va {
	destring `v', replace
	}

collapse (mean) xcoord ycoord alt_mean tri_mean coffee_mean maize_mean sugar_mean malaria_mean latosol acrisol terraroxa ///
 (rawsum) pop pop_urban mun_area gdp total_va ag_va man_va serv_va gov_va (max) river [aw=mun_area], by(cod_municipio)
 
 	*and save:
	save "$workingdata/mun_aux.dta", replace
	
*now we import which are the neighboring municipalities:
import delim "$data/mun_neighboring.csv", delim(";") case(preserve) stringcols(_all) clear	
rename CD_GEOCODM cod_municipio
keep cod_municipio
gen rail_access = 0

	*merge:
	merge 1:1 cod_municipio using "$workingdata/mun_aux.dta"
	keep if _m==3
	drop _m
	
*append with the sample dataset:
append using "$workingdata/mun_data.dta", keep(cod_municipio xcoord ycoord alt_mean tri_mean ///
coffee_mean maize_mean sugar_mean malaria_mean latosol acrisol terraroxa ///
pop pop_urban mun_area gdp total_va ag_va man_va serv_va gov_va river endpoint dist_to_luz time_as_endpoint)

	*drop out of sample:
	drop if endpoint==1
	drop if dist_to_luz==0
	
	*drop variables:
	drop endpoint dist_to_luz
	
	*and indicate whether a railroad:
	replace rail_access = 1 if rail_access==.
		tab rail_access
		
*save dataset:
save "$workingdata/mun_data_neighboring.dta", replace	

*drop all files that will not be used in analysis.do:
erase "$workingdata/amc.dta"
erase "$workingdata/dist_aux.dta"
erase "$workingdata/mun_aux.dta"
erase "$workingdata/next_dist.dta"
erase "$workingdata/rail_stations.dta"



